{"id":"https://openalex.org/W4411449748","doi":"https://doi.org/10.1145/3715735","title":"Hallucination Detection in Large Language Models with Metamorphic Relations","display_name":"Hallucination Detection in Large Language Models with Metamorphic Relations","publication_year":2025,"publication_date":"2025-06-19","ids":{"openalex":"https://openalex.org/W4411449748","doi":"https://doi.org/10.1145/3715735"},"language":"en","primary_location":{"id":"doi:10.1145/3715735","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3715735","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Borui Yang","orcid":"https://orcid.org/0009-0009-3482-7667"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Borui Yang","raw_affiliation_strings":["King's College London, London, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0009-3482-7667","affiliations":[{"raw_affiliation_string":"King's College London, London, United Kingdom","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071982681","display_name":"Md Afif Al Mamun","orcid":"https://orcid.org/0000-0002-9319-3483"},"institutions":[{"id":"https://openalex.org/I168635309","display_name":"University of Calgary","ror":"https://ror.org/03yjb2x39","country_code":"CA","type":"education","lineage":["https://openalex.org/I168635309"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Md Afif Al Mamun","raw_affiliation_strings":["University of Calgary, Calgary, Canada"],"raw_orcid":"https://orcid.org/0000-0002-9319-3483","affiliations":[{"raw_affiliation_string":"University of Calgary, Calgary, Canada","institution_ids":["https://openalex.org/I168635309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088708850","display_name":"Jie M. Zhang","orcid":"https://orcid.org/0000-0003-0481-7264"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jie M. Zhang","raw_affiliation_strings":["King's College London, London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-0481-7264","affiliations":[{"raw_affiliation_string":"King's College London, London, United Kingdom","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067154597","display_name":"Gias Uddin","orcid":"https://orcid.org/0000-0003-1376-095X"},"institutions":[{"id":"https://openalex.org/I192455969","display_name":"York University","ror":"https://ror.org/05fq50484","country_code":"CA","type":"education","lineage":["https://openalex.org/I192455969"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Gias Uddin","raw_affiliation_strings":["York University, Toronto, Canada"],"raw_orcid":"https://orcid.org/0000-0003-1376-095X","affiliations":[{"raw_affiliation_string":"York University, Toronto, Canada","institution_ids":["https://openalex.org/I192455969"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":26.3816,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.99493258,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"2","issue":"FSE","first_page":"425","last_page":"445"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9492999911308289,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9350000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.6542838215827942},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5376812815666199},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5030476450920105},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3627644181251526},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3530338406562805},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32160958647727966},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.21892741322517395}],"concepts":[{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.6542838215827942},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5376812815666199},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5030476450920105},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3627644181251526},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3530338406562805},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32160958647727966},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21892741322517395}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3715735","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3715735","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","score":0.47999998927116394,"display_name":"No poverty"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W2769533150","https://openalex.org/W2782311202","https://openalex.org/W2914584834","https://openalex.org/W2952862139","https://openalex.org/W3007157104","https://openalex.org/W3040868708","https://openalex.org/W3094587024","https://openalex.org/W3109716606","https://openalex.org/W3154222818","https://openalex.org/W3196268181","https://openalex.org/W4221148722","https://openalex.org/W4281609631","https://openalex.org/W4285429195","https://openalex.org/W4287724053","https://openalex.org/W4288088397","https://openalex.org/W4288113062","https://openalex.org/W4300665782","https://openalex.org/W4307123345","https://openalex.org/W4309674289","https://openalex.org/W4309986458","https://openalex.org/W4311550910","https://openalex.org/W4327810158","https://openalex.org/W4327810286","https://openalex.org/W4361230777","https://openalex.org/W4377372342","https://openalex.org/W4377865889","https://openalex.org/W4377866048","https://openalex.org/W4378509270","https://openalex.org/W4378771713","https://openalex.org/W4381797997","https://openalex.org/W4382319416","https://openalex.org/W4383987670","https://openalex.org/W4385231746","https://openalex.org/W4385327559","https://openalex.org/W4385571034","https://openalex.org/W4386501849","https://openalex.org/W4387390444","https://openalex.org/W4387432227","https://openalex.org/W4391158659","https://openalex.org/W4391940880","https://openalex.org/W4392353733","https://openalex.org/W4396652682","https://openalex.org/W4398230331","https://openalex.org/W4399554833","https://openalex.org/W4400111732","https://openalex.org/W4401905935","https://openalex.org/W4402860127","https://openalex.org/W4404534210","https://openalex.org/W4406779421","https://openalex.org/W6810242208"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W3125011624","https://openalex.org/W1508631387","https://openalex.org/W2370917603","https://openalex.org/W2952760143","https://openalex.org/W2017776670","https://openalex.org/W2347897961","https://openalex.org/W2340870721","https://openalex.org/W2358318464"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4,57,65],"prone":[5],"to":[6,209],"hallucinations,":[7],"e.g.,":[8],"factually":[9],"incorrect":[10],"information,":[11],"in":[12,158],"their":[13],"responses.":[14],"These":[15],"hallucinations":[16],"present":[17],"challenges":[18],"for":[19,68],"LLM-based":[20],"applications":[21],"that":[22,83,114,154],"demand":[23],"high":[24,49],"factual":[25],"accuracy.":[26],"Existing":[27],"hallucination":[28,80,136],"detection":[29,81,137],"methods":[30,59],"primarily":[31],"depend":[32],"on":[33,61,111,144],"external":[34,97],"resources,":[35],"which":[36,64],"can":[37],"suffer":[38],"from":[39,180],"issues":[40],"such":[41],"as":[42],"low":[43,51],"availability,":[44],"incomplete":[45],"coverage,":[46],"privacy":[47],"concerns,":[48],"latency,":[50],"reliability,":[52],"and":[53,87,99,105,143,147,163,191],"poor":[54],"scalability.":[55],"There":[56],"also":[58,221],"depending":[60],"output":[62],"probabilities,":[63],"often":[66],"inaccessible":[67],"closed-source":[69,106,149],"LLMs":[70,169],"like":[71],"GPT":[72],"models.":[73],"This":[74],"paper":[75],"presents":[76],"MetaQA,":[77],"a":[78,120,176],"self-contained":[79],"approach":[82],"leverages":[84],"metamorphic":[85,124],"relation":[86],"prompt":[88],"mutation.":[89],"Unlike":[90],"existing":[91],"methods,":[92],"MetaQA":[93,108,131,155,172,201,220],"operates":[94],"without":[95],"any":[96],"resources":[98],"is":[100,109,119],"compatible":[101],"with":[102,132,175,199],"both":[103],"open-source":[104,146],"LLMs.":[107,150],"based":[110],"the":[112,122,133,167],"hypothesis":[113],"if":[115],"an":[116,203,215],"LLM\u2019s":[117],"response":[118],"hallucination,":[121],"designed":[123],"relations":[125],"will":[126],"be":[127],"violated.":[128],"We":[129],"compare":[130],"state-of-the-art":[134],"zero-resource":[135],"method,":[138],"SelfCheckGPT,":[139],"across":[140,224],"multiple":[141],"datasets,":[142],"two":[145,148],"Our":[151],"results":[152],"reveal":[153],"outperforms":[156,173],"SelfCheckGPT":[157,174],"terms":[159],"of":[160,206,212,218,228],"precision,":[161],"recall,":[162],"f1":[164],"score.":[165],"For":[166,197],"four":[168],"we":[170],"study,":[171],"superiority":[177,223],"margin":[178],"ranging":[179],"0.041":[181],"-":[182,187,193],"0.113":[183],"(for":[184,189,195],"precision),":[185],"0.143":[186],"0.430":[188],"recall),":[190],"0.154":[192],"0.368":[194],"F1-score).":[196],"instance,":[198],"Mistral-7B,":[200],"achieves":[202],"average":[204],"F1-score":[205,211],"0.435,":[207],"compared":[208],"SelfCheckGPT\u2019s":[210],"0.205,":[213],"representing":[214],"improvement":[216],"rate":[217],"112.2%.":[219],"demonstrates":[222],"all":[225],"different":[226],"categories":[227],"questions.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
