{"id":"https://openalex.org/W4416034322","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.891","title":"VerifiAgent: a Unified Verification Agent in Language Model Reasoning","display_name":"VerifiAgent: a Unified Verification Agent in Language Model Reasoning","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416034322","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.891"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.891","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.891","pdf_url":"https://aclanthology.org/2025.findings-emnlp.891.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-emnlp.891.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058074512","display_name":"Jiuzhou Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiuzhou Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005792924","display_name":"Wray Buntine","orcid":"https://orcid.org/0000-0001-9292-1015"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wray Buntine","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086032589","display_name":"Ehsan Shareghi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ehsan Shareghi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7588,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89398708,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"16410","last_page":"16431"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2134999930858612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2134999930858612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.13359999656677246,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.09109999984502792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.46070000529289246},{"id":"https://openalex.org/keywords/non-monotonic-logic","display_name":"Non-monotonic logic","score":0.3767000138759613},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.32989999651908875},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.32670000195503235},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.32199999690055847},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3124000132083893},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.287200003862381}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6819999814033508},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5034000277519226},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.46070000529289246},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45159998536109924},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.3767000138759613},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.32989999651908875},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.32670000195503235},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.32199999690055847},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.29600000381469727},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C185954173","wikidata":"https://www.wikidata.org/wiki/Q4347052","display_name":"Object language","level":3,"score":0.28459998965263367},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.27489998936653137},{"id":"https://openalex.org/C62360110","wikidata":"https://www.wikidata.org/wiki/Q96777007","display_name":"Circumscription","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-emnlp.891","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.891","pdf_url":"https://aclanthology.org/2025.findings-emnlp.891.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.891","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.891","pdf_url":"https://aclanthology.org/2025.findings-emnlp.891.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416034322.pdf","grobid_xml":"https://content.openalex.org/works/W4416034322.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,141],"demonstrate":[3],"remarkable":[4],"reasoning":[5,30,71,105,111,145],"capabilities":[6],"but":[7],"often":[8],"produce":[9],"unreliable":[10],"or":[11,19,76],"incorrect":[12],"responses.Existing":[13],"verification":[14,40,66,88,96,117],"methods":[15,97],"are":[16],"typically":[17],"model-specific":[18],"domain-restricted,":[20],"requiring":[21],"significant":[22],"computational":[23],"resources":[24],"and":[25,52,57,84,134],"lacking":[26],"scalability":[27],"across":[28,86],"diverse":[29],"tasks.To":[31],"address":[32],"these":[33],"limitations,":[34],"we":[35],"propose":[36],"VerifiAgent,":[37],"a":[38],"unified":[39],"agent":[41],"that":[42,92],"integrates":[43],"two":[44],"levels":[45],"of":[46],"verification:":[47],"meta-verification,":[48],"which":[49],"assesses":[50],"completeness":[51],"consistency":[53],"in":[54,142],"model":[55],"responses,":[56],"tool-based":[58],"adaptive":[59,79],"verification,":[60],"where":[61],"Ver-ifiAgent":[62],"autonomously":[63],"selects":[64],"appropriate":[65],"tools":[67],"based":[68],"on":[69],"the":[70,143],"type,":[72],"including":[73],"mathematical,":[74],"logical,":[75],"commonsense":[77],"reasoning.This":[78],"approach":[80],"ensures":[81],"both":[82],"efficiency":[83],"robustness":[85],"different":[87],"scenarios.Experimental":[89],"results":[90,129],"show":[91],"VerifiAgent":[93,153],"outperforms":[94],"baseline":[95],"(e.g.,":[98],"deductive":[99],"verifier,":[100],"backward":[101],"verifier)":[102],"among":[103],"all":[104],"tasks.Additionally,":[106],"it":[107],"can":[108,119],"further":[109],"enhance":[110],"accuracy":[112],"by":[113],"leveraging":[114],"feedback":[115],"from":[116],"results.VerifiAgent":[118],"also":[120],"be":[121],"effectively":[122],"applied":[123],"to":[124,137],"inference":[125],"scaling,":[126],"achieving":[127],"better":[128],"with":[130],"fewer":[131],"generated":[132],"samples":[133],"costs":[135],"compared":[136],"existing":[138],"process":[139],"reward":[140],"mathematical":[144],"domain.":[146],"1":[147],"C":[148],"Different":[149],"backbone":[150],"LLMs":[151],"for":[152]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
