{"id":"https://openalex.org/W7131383108","doi":"https://doi.org/10.48550/arxiv.2602.19127","title":"AgenticRAGTracer: A Hop-Aware Benchmark for Diagnosing Multi-Step Retrieval Reasoning in Agentic RAG","display_name":"AgenticRAGTracer: A Hop-Aware Benchmark for Diagnosing Multi-Step Retrieval Reasoning in Agentic RAG","publication_year":2026,"publication_date":"2026-02-22","ids":{"openalex":"https://openalex.org/W7131383108","doi":"https://doi.org/10.48550/arxiv.2602.19127"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19127","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120442906","display_name":"Qijie You","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"You, Qijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124889500","display_name":"Wenkai Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Wenkai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126786040","display_name":"Wentao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wentao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5120442906"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6772000193595886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6772000193595886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.17550000548362732,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.03440000116825104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8183000087738037},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5866000056266785},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.585099995136261},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.5526999831199646},{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.4153999984264374},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41100001335144043},{"id":"https://openalex.org/keywords/mainstream","display_name":"Mainstream","score":0.36820000410079956}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8183000087738037},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7638999819755554},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5866000056266785},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.585099995136261},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.5526999831199646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42080000042915344},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.4153999984264374},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3873000144958496},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.36820000410079956},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3668000102043152},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.33550000190734863},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.7294503450393677}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,53,63,113,155,174,210],"rapid":[2],"advancement":[3],"of":[4,84,177],"agent-based":[5],"methods":[6],"in":[7,26,219,229,237],"recent":[8],"years,":[9],"Agentic":[10,115,230],"RAG":[11,116,231],"has":[12,143],"undoubtedly":[13],"become":[14],"an":[15,76],"important":[16],"research":[17,228],"direction.":[18],"Multi-hop":[19],"reasoning,":[20],"which":[21,74,94],"requires":[22],"models":[23,126,159],"to":[24,62,129,205],"engage":[25],"deliberate":[27],"thinking":[28],"and":[29,49,79,98,104,127,142,232,242],"multi-step":[30],"interaction,":[31],"serves":[32],"as":[33],"a":[34,202,215],"critical":[35,203],"testbed":[36],"for":[37],"assessing":[38],"such":[39],"capabilities.":[40,86],"However,":[41],"existing":[42,147],"benchmarks":[43,90],"typically":[44],"provide":[45],"only":[46],"final":[47,64],"questions":[48,56,61],"answers,":[50],"while":[51,100],"lacking":[52],"intermediate":[54],"hop-level":[55],"that":[57,118,153,183],"gradually":[58],"connect":[59],"atomic":[60],"multi-hop":[65],"query.":[66],"This":[67,200],"limitation":[68],"prevents":[69],"researchers":[70],"from":[71],"analyzing":[72],"at":[73,246],"step":[75],"agent":[77],"fails":[78],"restricts":[80],"more":[81],"fine-grained":[82],"evaluation":[83],"model":[85],"Moreover,":[87],"most":[88],"current":[89],"are":[91,185,244],"manually":[92],"constructed,":[93],"is":[95,119],"both":[96],"time-consuming":[97],"labor-intensive,":[99],"also":[101],"limiting":[102],"scalability":[103],"generalization.":[105],"To":[106],"address":[107],"these":[108],"challenges,":[109],"we":[110],"introduce":[111],"AgenticRAGTracer,":[112],"first":[114],"benchmark":[117,134],"primarily":[120,186],"constructed":[121],"automatically":[122],"by":[123,188],"large":[124,157],"language":[125,158],"designed":[128],"support":[130],"step-by-step":[131],"validation.":[132],"Our":[133,240],"spans":[135],"multiple":[136],"domains,":[137],"contains":[138],"1,305":[139],"data":[140,243],"points,":[141],"no":[144],"overlap":[145],"with":[146,209],"mainstream":[148],"benchmarks.":[149],"Extensive":[150],"experiments":[151],"demonstrate":[152],"even":[154],"best":[156],"perform":[160],"poorly":[161],"on":[162,173],"our":[163,178,224],"dataset.":[164,179],"For":[165],"instance,":[166],"GPT-5":[167],"attains":[168],"merely":[169],"22.6\\%":[170],"EM":[171],"accuracy":[172],"hardest":[175],"portion":[176],"Hop-aware":[180],"diagnosis":[181],"reveals":[182],"failures":[184],"driven":[187],"distorted":[189],"reasoning":[190],"chains":[191],"--":[192],"either":[193],"collapsing":[194],"prematurely":[195],"or":[196],"wandering":[197],"into":[198],"over-extension.":[199],"highlights":[201],"inability":[204],"allocate":[206],"steps":[207],"consistent":[208],"task's":[211],"logical":[212],"structure,":[213],"providing":[214],"diagnostic":[216],"dimension":[217],"missing":[218],"traditional":[220],"evaluations.":[221],"We":[222],"believe":[223],"work":[225],"will":[226],"facilitate":[227],"inspire":[233],"further":[234],"meaningful":[235],"progress":[236],"this":[238],"area.":[239],"code":[241],"available":[245],"https://github.com/YqjMartin/AgenticRAGTracer.":[247]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
