{"id":"https://openalex.org/W4403223051","doi":"https://doi.org/10.1145/3689776","title":"Drowzee: Metamorphic Testing for Fact-Conflicting Hallucination Detection in Large Language Models","display_name":"Drowzee: Metamorphic Testing for Fact-Conflicting Hallucination Detection in Large Language Models","publication_year":2024,"publication_date":"2024-10-08","ids":{"openalex":"https://openalex.org/W4403223051","doi":"https://doi.org/10.1145/3689776"},"language":"en","primary_location":{"id":"doi:10.1145/3689776","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689776","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3689776","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047202892","display_name":"Ningke Li","orcid":"https://orcid.org/0000-0003-0270-1372"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ningke Li","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-0270-1372","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068561556","display_name":"Yuekang Li","orcid":"https://orcid.org/0000-0003-4382-0757"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yuekang Li","raw_affiliation_strings":["The University of New South Wales, Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0003-4382-0757","affiliations":[{"raw_affiliation_string":"The University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330541","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0002-4978-127X"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-4978-127X","affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085767410","display_name":"Ling Shi","orcid":"https://orcid.org/0000-0002-2023-0247"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ling Shi","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-2023-0247","affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000432413","display_name":"Kailong Wang","orcid":"https://orcid.org/0000-0002-3977-6573"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kailong Wang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-3977-6573","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115695530","display_name":"Haoyu Wang","orcid":"https://orcid.org/0000-0003-1100-8633"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Wang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-1100-8633","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5047202892"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":4.7203,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.95540106,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"8","issue":"OOPSLA2","first_page":"1843","last_page":"1872"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metamorphic-rock","display_name":"Metamorphic rock","score":0.5673708915710449},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.44720518589019775},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.41582080721855164},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3867688775062561},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3690192699432373},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3210337162017822},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.2618948221206665},{"id":"https://openalex.org/keywords/geochemistry","display_name":"Geochemistry","score":0.1514817476272583},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.12406575679779053}],"concepts":[{"id":"https://openalex.org/C26687426","wikidata":"https://www.wikidata.org/wiki/Q47069","display_name":"Metamorphic rock","level":2,"score":0.5673708915710449},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.44720518589019775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.41582080721855164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3867688775062561},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3690192699432373},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3210337162017822},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.2618948221206665},{"id":"https://openalex.org/C17409809","wikidata":"https://www.wikidata.org/wiki/Q161764","display_name":"Geochemistry","level":1,"score":0.1514817476272583},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.12406575679779053}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3689776","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689776","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3689776","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689776","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W2022166150","https://openalex.org/W2081580037","https://openalex.org/W2138522630","https://openalex.org/W2904134584","https://openalex.org/W4253706206","https://openalex.org/W4281609631","https://openalex.org/W4294753225","https://openalex.org/W4311550910","https://openalex.org/W4383987670","https://openalex.org/W4384918448","https://openalex.org/W4384920109","https://openalex.org/W4386081573","https://openalex.org/W4386184855","https://openalex.org/W4386501849","https://openalex.org/W4387355345","https://openalex.org/W4387561528","https://openalex.org/W4387947626","https://openalex.org/W4388328860","https://openalex.org/W4388585881","https://openalex.org/W4389116118","https://openalex.org/W4390723197","https://openalex.org/W4392353733","https://openalex.org/W4396821716","https://openalex.org/W4402442681","https://openalex.org/W4402667157","https://openalex.org/W6638960211","https://openalex.org/W6891837975"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2388869842","https://openalex.org/W2354163170","https://openalex.org/W2012269922","https://openalex.org/W2368683800","https://openalex.org/W4255743663","https://openalex.org/W2401987651","https://openalex.org/W1006855143","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Large":[0],"language":[1,6],"models":[2],"(LLMs)":[3],"have":[4],"revolutionized":[5],"processing,":[7],"but":[8,20],"face":[9],"critical":[10],"challenges":[11],"with":[12,148,207,220],"security,":[13],"privacy,":[14],"and":[15,51,110,137,188,198,225,244,284],"generating":[16],"hallucinations":[17,200],"\u2014":[18],"coherent":[19],"factually":[21],"inaccurate":[22],"outputs.":[23],"A":[24],"major":[25],"issue":[26],"is":[27,41,55,83],"fact-conflicting":[28],"hallucination":[29,208],"(FCH),":[30],"where":[31],"LLMs":[32,154,203,218],"produce":[33],"content":[34],"contradicting":[35],"ground":[36,149,189],"truth":[37,150],"facts.":[38],"Addressing":[39],"FCH":[40,74,104],"difficult":[42],"due":[43],"to":[44,163,213,267,282],"two":[45,173],"key":[46],"challenges:":[47],"1)":[48],"Automatically":[49],"constructing":[50],"updating":[52],"benchmark":[53],"datasets":[54],"hard,":[56],"as":[57],"existing":[58],"methods":[59],"rely":[60],"on":[61,155,261],"manually":[62],"curated":[63],"static":[64],"benchmarks":[65],"that":[66,113,176,234],"cannot":[67],"cover":[68],"the":[69,78,178,181,185,250,276],"broad,":[70],"evolving":[71],"spectrum":[72],"of":[73,145,184,227],"cases.":[75],"2)":[76],"Validating":[77],"reasoning":[79,133,229],"behind":[80],"LLM":[81,186],"outputs":[82],"inherently":[84],"difficult,":[85],"especially":[86],"for":[87,103,278],"complex":[88],"logical":[89,132,228],"relations.":[90],"To":[91,167,247],"tackle":[92],"these":[93,156],"challenges,":[94],"we":[95,135,171,253],"introduce":[96],"a":[97,115,142,262],"novel":[98],"logic-programming-aided":[99],"metamorphic":[100],"testing":[101],"technique":[102],"detection.":[105],"We":[106,152],"develop":[107],"an":[108],"extensive":[109],"extensible":[111],"framework":[112],"constructs":[114],"comprehensive":[116],"factual":[117],"knowledge":[118,140,271],"base":[119],"by":[120,239],"crawling":[121],"sources":[122],"like":[123],"Wikipedia,":[124],"seamlessly":[125],"integrated":[126],"into":[127,141],"D":[128,240],"rowzee":[129,241],".":[130],"Using":[131],"rules,":[134],"transform":[136],"augment":[138],"this":[139],"large":[143],"set":[144],"test":[146,153,196,236],"cases":[147,157,197,237],"answers.":[151,166],"through":[158],"template-based":[159],"prompts,":[160],"requiring":[161],"them":[162],"provide":[164],"reasoned":[165],"validate":[168],"their":[169],"reasoning,":[170],"propose":[172],"semantic-aware":[174],"oracles":[175],"assess":[177],"similarity":[179],"between":[180],"semantic":[182],"structures":[183],"answers":[187],"truth.":[190],"Our":[191,273],"approach":[192],"automatically":[193],"generates":[194],"useful":[195],"identifies":[199],"across":[201],"six":[202],"within":[204],"nine":[205],"domains,":[206],"rates":[209],"ranging":[210],"from":[211],"24.7%":[212],"59.8%.":[214],"Key":[215],"findings":[216,274],"include":[217],"struggling":[219],"temporal":[221],"concepts,":[222],"out-of-distribution":[223],"knowledge,":[224],"lack":[226],"capabilities.":[230],"The":[231],"results":[232],"show":[233],"logic-based":[235],"generated":[238],"effectively":[242],"trigger":[243],"detect":[245,283],"hallucinations.":[246,287],"further":[248],"mitigate":[249,285],"identified":[251],"FCHs,":[252],"explored":[254],"model":[255,286],"editing":[256],"techniques,":[257],"which":[258],"proved":[259],"effective":[260],"small":[263],"scale":[264],"(with":[265],"edits":[266],"fewer":[268],"than":[269],"1000":[270],"pieces).":[272],"emphasize":[275],"need":[277],"continued":[279],"community":[280],"efforts":[281]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
