{"id":"https://openalex.org/W4411450064","doi":"https://doi.org/10.1145/3715784","title":"Detecting and Reducing the Factual Hallucinations of Large Language Models with Metamorphic Testing","display_name":"Detecting and Reducing the Factual Hallucinations of Large Language Models with Metamorphic Testing","publication_year":2025,"publication_date":"2025-06-19","ids":{"openalex":"https://openalex.org/W4411450064","doi":"https://doi.org/10.1145/3715784"},"language":"en","primary_location":{"id":"doi:10.1145/3715784","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3715784","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3715784","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004605359","display_name":"Weibin Wu","orcid":"https://orcid.org/0000-0002-7262-6219"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weibin Wu","raw_affiliation_strings":["Sun Yat-sen University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Zhuhai, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101949866","display_name":"Yuan Cao","orcid":"https://orcid.org/0009-0005-9401-276X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhang Cao","raw_affiliation_strings":["Sun Yat-sen University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Zhuhai, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004794671","display_name":"Ning Yi","orcid":"https://orcid.org/0000-0002-4788-3985"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Yi","raw_affiliation_strings":["Sun Yat-sen University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Zhuhai, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110116867","display_name":"R. Ou","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongyi Ou","raw_affiliation_strings":["Sun Yat-sen University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Zhuhai, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000582109","display_name":"Zibin Zheng","orcid":"https://orcid.org/0000-0002-7878-4330"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zibin Zheng","raw_affiliation_strings":["Sun Yat-sen University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Zhuhai, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5004605359"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":9.9395,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.97722857,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"2","issue":"FSE","first_page":"1432","last_page":"1453"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hallucinating","display_name":"Hallucinating","score":0.7383001446723938},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5900589823722839},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4812207520008087},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.38374844193458557},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3554067015647888},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3022496700286865}],"concepts":[{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.7383001446723938},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5900589823722839},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4812207520008087},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.38374844193458557},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3554067015647888},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3022496700286865},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3715784","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3715784","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3715784","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3715784","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.44999998807907104,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2162036145","https://openalex.org/W2782311202","https://openalex.org/W2998870469","https://openalex.org/W3196268181","https://openalex.org/W4210497109","https://openalex.org/W4221143046","https://openalex.org/W4226278401","https://openalex.org/W4241078940","https://openalex.org/W4281557260","https://openalex.org/W4313563818","https://openalex.org/W4376548770","https://openalex.org/W4384345715","https://openalex.org/W4385216086","https://openalex.org/W4385572377","https://openalex.org/W4385767379","https://openalex.org/W4387323850","https://openalex.org/W4400582422","https://openalex.org/W4402727921","https://openalex.org/W4404534210","https://openalex.org/W4404781779","https://openalex.org/W4409362569","https://openalex.org/W6838865847"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W3204676188","https://openalex.org/W2992803471","https://openalex.org/W2066036438","https://openalex.org/W2078454190","https://openalex.org/W2108123836","https://openalex.org/W2963350370","https://openalex.org/W2930227984","https://openalex.org/W4211075255"],"abstract_inverted_index":{"Question":[0],"answering":[1],"(QA)":[2],"is":[3,118],"a":[4,57,125],"fundamental":[5],"task":[6,103],"of":[7,46,66,112,116,128,138,161,183,195,204],"large":[8],"language":[9,130],"models":[10],"(LLMs),":[11],"which":[12,41,108],"requires":[13],"LLMs":[14,24,47,68,82,99,196],"to":[15,27,88,97],"automatically":[16],"answer":[17],"human-posed":[18],"questions":[19],"in":[20,48],"natural":[21,129],"language.":[22],"However,":[23],"are":[25,78,85],"known":[26],"distort":[28],"facts":[29],"and":[30,61,142,197],"make":[31,98],"non-factual":[32],"statements":[33],"(i.e.,":[34],"hallucinations)":[35],"when":[36,81],"dealing":[37],"with":[38,69,104,177],"QA":[39],"tasks,":[40],"may":[42],"affect":[43],"the":[44,63,101,110,153,174,193,202],"deployment":[45],"real-life":[49],"situations.":[50],"In":[51],"this":[52],"work,":[53],"we":[54,93],"propose":[55],"DrHall,":[56],"framework":[58],"for":[59,164,201],"detecting":[60],"reducing":[62],"factual":[64],"hallucinations":[65],"black-box":[67],"metamorphic":[70,95],"testing":[71,96],"(MT).":[72],"We":[73,186],"believe":[74],"that":[75,148,188],"hallucinated":[76],"answers":[77,91],"unstable.":[79],"Therefore,":[80],"hallucinate,":[83],"they":[84],"more":[86],"likely":[87],"produce":[89],"different":[90,105],"if":[92],"use":[94],"re-execute":[100],"same":[102],"execution":[106],"paths,":[107],"motivates":[109],"design":[111],"DrHall.":[113],"The":[114,144],"effectiveness":[115],"DrHall":[117,149,170],"evaluated":[119],"empirically":[120],"on":[121],"three":[122],"datasets,":[123],"including":[124],"self-built":[126],"dataset":[127],"questions:":[131,140],"FactHalluQA,":[132],"as":[133,135],"well":[134],"two":[136],"datasets":[137],"programming":[139],"Refactory":[141],"LeetCode.":[143],"evaluation":[145],"results":[146],"confirm":[147],"can":[150,171,191],"consistently":[151],"outperform":[152,173],"state-of-the-art":[154,175],"baselines,":[155,176],"obtaining":[156],"an":[157,178],"average":[158,179],"F1":[159],"score":[160],"over":[162,184],"0.856":[163],"hallucination":[165,168,180,206],"detection.":[166],"For":[167],"correction,":[169],"also":[172],"correction":[181],"rate":[182],"53%.":[185],"hope":[187],"our":[189],"work":[190],"enhance":[192],"reliability":[194],"provide":[198],"new":[199],"insights":[200],"research":[203],"LLM":[205],"mitigation.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
