{"id":"https://openalex.org/W4402811916","doi":"https://doi.org/10.1109/csr61664.2024.10679494","title":"CyberMetric: A Benchmark Dataset based on Retrieval-Augmented Generation for Evaluating LLMs in Cybersecurity Knowledge","display_name":"CyberMetric: A Benchmark Dataset based on Retrieval-Augmented Generation for Evaluating LLMs in Cybersecurity Knowledge","publication_year":2024,"publication_date":"2024-09-02","ids":{"openalex":"https://openalex.org/W4402811916","doi":"https://doi.org/10.1109/csr61664.2024.10679494"},"language":"en","primary_location":{"id":"doi:10.1109/csr61664.2024.10679494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csr61664.2024.10679494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Cyber Security and Resilience (CSR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073042391","display_name":"Norbert Tihanyi","orcid":"https://orcid.org/0000-0002-9002-5935"},"institutions":[{"id":"https://openalex.org/I4210087059","display_name":"Technology Innovation Institute","ror":"https://ror.org/001kv2y39","country_code":"AE","type":"facility","lineage":["https://openalex.org/I4210087059"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Norbert Tihanyi","raw_affiliation_strings":["Technology Innovation Institute (TII),Abu Dhabi,United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technology Innovation Institute (TII),Abu Dhabi,United Arab Emirates","institution_ids":["https://openalex.org/I4210087059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026903935","display_name":"Mohamed Amine Ferrag","orcid":"https://orcid.org/0000-0002-0632-3172"},"institutions":[{"id":"https://openalex.org/I4210087059","display_name":"Technology Innovation Institute","ror":"https://ror.org/001kv2y39","country_code":"AE","type":"facility","lineage":["https://openalex.org/I4210087059"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Mohamed Amine Ferrag","raw_affiliation_strings":["Technology Innovation Institute (TII),Abu Dhabi,United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technology Innovation Institute (TII),Abu Dhabi,United Arab Emirates","institution_ids":["https://openalex.org/I4210087059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102012126","display_name":"Ridhi Jain","orcid":"https://orcid.org/0000-0002-6102-7114"},"institutions":[{"id":"https://openalex.org/I4210087059","display_name":"Technology Innovation Institute","ror":"https://ror.org/001kv2y39","country_code":"AE","type":"facility","lineage":["https://openalex.org/I4210087059"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ridhi Jain","raw_affiliation_strings":["Technology Innovation Institute (TII),Abu Dhabi,United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technology Innovation Institute (TII),Abu Dhabi,United Arab Emirates","institution_ids":["https://openalex.org/I4210087059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052473439","display_name":"Tam\u00e1s Bisztray","orcid":"https://orcid.org/0000-0003-2626-3434"},"institutions":[{"id":"https://openalex.org/I184942183","display_name":"University of Oslo","ror":"https://ror.org/01xtthb56","country_code":"NO","type":"education","lineage":["https://openalex.org/I184942183"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Tamas Bisztray","raw_affiliation_strings":["University of Oslo,Oslo,Norway"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oslo,Oslo,Norway","institution_ids":["https://openalex.org/I184942183"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056145687","display_name":"M\u00e9rouane Debbah","orcid":"https://orcid.org/0000-0001-8941-8080"},"institutions":[{"id":"https://openalex.org/I176601375","display_name":"Khalifa University of Science and Technology","ror":"https://ror.org/05hffr360","country_code":"AE","type":"education","lineage":["https://openalex.org/I176601375"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Merouane Debbah","raw_affiliation_strings":["Khalifa University,Abu Dhabi,United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Khalifa University,Abu Dhabi,United Arab Emirates","institution_ids":["https://openalex.org/I176601375"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.2961,"has_fulltext":false,"cited_by_count":46,"citation_normalized_percentile":{"value":0.99273296,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"296","last_page":"302"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9004999995231628,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9004999995231628,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8454470038414001},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7198756337165833},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4736063480377197},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3976982831954956},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3767440617084503},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07909265160560608},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.0544988214969635}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8454470038414001},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7198756337165833},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4736063480377197},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3976982831954956},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3767440617084503},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07909265160560608},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0544988214969635}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/csr61664.2024.10679494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csr61664.2024.10679494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Cyber Security and Resilience (CSR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1998963078","https://openalex.org/W2334461743","https://openalex.org/W2766447205","https://openalex.org/W2806081754","https://openalex.org/W2890894339","https://openalex.org/W2911296969","https://openalex.org/W2963957489","https://openalex.org/W2996848635","https://openalex.org/W3027879771","https://openalex.org/W3134642945","https://openalex.org/W3177813494","https://openalex.org/W4293246685","https://openalex.org/W4361866125","https://openalex.org/W4378473878","https://openalex.org/W4382322607","https://openalex.org/W4384071683","https://openalex.org/W4384390176","https://openalex.org/W4385570331","https://openalex.org/W4386555653","https://openalex.org/W4396786650","https://openalex.org/W4399568491","https://openalex.org/W4401042286","https://openalex.org/W4402665833","https://openalex.org/W6746490324","https://openalex.org/W6752148829","https://openalex.org/W6777615688","https://openalex.org/W6791376898","https://openalex.org/W6798182279","https://openalex.org/W6850820320","https://openalex.org/W6852775702","https://openalex.org/W6854020643","https://openalex.org/W6855071685","https://openalex.org/W6856074014","https://openalex.org/W6856537298","https://openalex.org/W6860005124","https://openalex.org/W6865891915"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4,71],"increasingly":[5],"used":[6],"across":[7],"various":[8],"domains,":[9],"from":[10,260],"software":[11],"development":[12],"to":[13,50,110,139,146,152,169,181],"cyber":[14],"threat":[15],"intelligence.":[16],"Understanding":[17],"all":[18],"the":[19,52,107,135,164,197,217,221,253,261],"different":[20],"cybersecurity":[21,108,199],"fields,":[22],"including":[23,94],"topics":[24],"such":[25,240],"as":[26,192,241],"cryptography,":[27],"reverse":[28],"engineering,":[29],"and":[30,47,68,80,87,103,126,137,143,145,157,203,213,256],"risk":[31],"assessment,":[32],"poses":[33],"a":[34,44,185,193],"challenge":[35],"even":[36],"for":[37,195,252],"human":[38,179,234],"experts.":[39],"The":[40,118,188,205,246],"research":[41,97,254],"community":[42,255],"needs":[43],"diverse,":[45],"accurate,":[46],"up-to-date":[48],"dataset":[49,248],"test":[51],"general":[53,198],"knowledge":[54,200],"of":[55,123,173,201],"LLMs":[56,223],"in":[57,106,184],"cybersecurity.":[58,153],"To":[59],"address":[60],"this":[61],"gap,":[62],"we":[63,91,176],"present":[64],"CyberMetric-80,":[65,230],"CyberMetric-500,":[66],"CyberMetric-2000,":[67],"CyberMetric-10000,":[69],"which":[70],"multiple-choice":[72],"Q&A":[73],"benchmark":[74],"datasets":[75],"comprising":[76],"80,":[77],"500,":[78],"2000,":[79],"10,000":[81],"questions,":[82,112],"respectively.":[83],"By":[84],"utilizing":[85],"GPT-3.5":[86],"Retrieval-Augmented":[88],"Generation":[89],"(RAG),":[90],"collected":[92],"documents,":[93],"NIST":[95],"standards,":[96],"papers,":[98],"publicly":[99,250],"accessible":[100],"books,":[101],"RFCs,":[102],"other":[104],"publications":[105],"domain,":[109],"generate":[111],"each":[113],"with":[114],"four":[115],"possible":[116],"answers.":[117],"results":[119,189],"underwent":[120],"several":[121],"rounds":[122],"error":[124],"checking":[125],"refinement.":[127],"Human":[128],"experts":[129,235],"invested":[130],"over":[131],"200":[132],"hours":[133],"validating":[134],"questions":[136,150],"solutions":[138],"ensure":[140],"their":[141],"accuracy":[142],"relevance":[144],"filter":[147],"out":[148],"any":[149],"unrelated":[151],"We":[154],"have":[155],"evaluated":[156],"compared":[158],"25":[159],"state-of-the-art":[160],"LLM":[161],"models":[162,239],"on":[163,229],"CyberMetric":[165,247],"datasets.":[166],"In":[167],"addition":[168],"our":[170],"primary":[171],"goal":[172],"evaluating":[174],"LLMs,":[175],"involved":[177],"30":[178],"participants":[180],"solve":[182],"CyberMetric-80":[183],"closed-book":[186],"scenario.":[187],"can":[190,257],"serve":[191],"reference":[194],"comparing":[196],"humans":[202,228],"LLMs.":[204,219],"findings":[206],"revealed":[207],"that":[208],"GPT-4o,":[209],"GPT-4-turbo,":[210],"Mixtral-8x7B-Instruct,":[211],"Falcon-180B-Chat,":[212],"GEMINI-pro":[214],"1.0":[215],"were":[216,224],"best-performing":[218],"Additionally,":[220],"top":[222],"more":[225],"accurate":[226],"than":[227],"although":[231],"highly":[232],"experienced":[233],"still":[236],"outperformed":[237],"small":[238],"Llama-3-8B,":[242],"Phi-2":[243],"or":[244],"Gemma-7b.":[245],"is":[249],"available":[251],"be":[258],"downloaded":[259],"projects'":[262],"website:":[263],"https://github.com/CyberMetric.":[264]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":3}],"updated_date":"2026-06-14T07:44:22.658603","created_date":"2025-10-10T00:00:00"}
