{"id":"https://openalex.org/W7133893727","doi":"https://doi.org/10.48550/arxiv.2603.04384","title":"AgentIR: Reasoning-Aware Retrieval for Deep Research Agents","display_name":"AgentIR: Reasoning-Aware Retrieval for Deep Research Agents","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7133893727","doi":"https://doi.org/10.48550/arxiv.2603.04384"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.04384","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.04384","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.04384","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122929383","display_name":"Zijian Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Zijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128132494","display_name":"Xueguang Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Xueguang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012958162","display_name":"Shengyao Zhuang","orcid":"https://orcid.org/0000-0002-6711-0955"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Shengyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128163103","display_name":"Jimmy Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Jimmy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128167329","display_name":"Akari Asai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Asai, Akari","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124661275","display_name":"Victor Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Victor","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5122929383"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2842999994754791,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2842999994754791,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1501999944448471,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.08699999749660492,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7815999984741211},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6154999732971191},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4959999918937683},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.48890000581741333},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.4771000146865845},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4740999937057495},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.41990000009536743}],"concepts":[{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7815999984741211},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7400000095367432},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6154999732971191},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5863999724388123},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4959999918937683},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.48890000581741333},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.4771000146865845},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4740999937057495},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.41990000009536743},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.41589999198913574},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4049000144004822},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33970001339912415},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3262999951839447},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.30160000920295715},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2921999990940094},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2581999897956848}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.04384","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.04384","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.04384","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.04384","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6907661557197571}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0,27,82],"Research":[1,28,83],"agents":[2,29],"are":[3,96,142],"rapidly":[4],"emerging":[5],"as":[6],"primary":[7],"consumers":[8],"of":[9],"modern":[10],"retrieval":[11,61],"systems.":[12],"Unlike":[13],"human":[14],"users":[15],"who":[16],"issue":[17],"and":[18,42,73,99,135,140],"refine":[19],"queries":[20],"without":[21],"documenting":[22],"their":[23,100],"intermediate":[24],"thought":[25],"processes,":[26],"generate":[30],"explicit":[31],"natural":[32],"language":[33],"reasoning":[34,68],"before":[35],"each":[36],"search":[37],"call,":[38],"revealing":[39],"rich":[40],"intent":[41],"contextual":[43],"information":[44],"that":[45,63,80,93],"existing":[46],"retrievers":[47],"entirely":[48],"ignore.":[49],"To":[50],"exploit":[51],"this":[52],"overlooked":[53],"signal,":[54],"we":[55],"introduce:":[56],"(1)":[57],"Reasoning-Aware":[58],"Retrieval,":[59],"a":[60,76,103],"paradigm":[62],"jointly":[64],"embeds":[65],"the":[66,112,121],"agent's":[67],"trace":[69],"alongside":[70],"its":[71,133],"query;":[72],"(2)":[74],"DR-Synth,":[75],"data":[77,86,141],"synthesis":[78],"method":[79],"generates":[81],"retriever":[84],"training":[85],"from":[87],"standard":[88],"QA":[89],"datasets.":[90],"We":[91],"demonstrate":[92],"both":[94],"components":[95],"independently":[97],"effective,":[98],"combination":[101],"yields":[102],"trained":[104],"embedding":[105,130],"model,":[106],"AgentIR-4B,":[107],"with":[108,120,128,137],"substantial":[109],"gains.":[110],"On":[111],"challenging":[113],"BrowseComp-Plus":[114],"benchmark,":[115],"AgentIR-4B":[116],"achieves":[117],"68\\%":[118],"accuracy":[119],"open-weight":[122],"agent":[123],"Tongyi-DeepResearch,":[124],"compared":[125],"to":[126],"50\\%":[127],"conventional":[129],"models":[131],"twice":[132],"size,":[134],"37\\%":[136],"BM25.":[138],"Code":[139],"available":[143],"at:":[144],"https://texttron.github.io/AgentIR/.":[145]},"counts_by_year":[],"updated_date":"2026-03-07T13:37:22.277990","created_date":"2026-03-06T00:00:00"}
