{"id":"https://openalex.org/W7158365662","doi":"https://doi.org/10.48550/arxiv.2604.25256","title":"AutoResearchBench: Benchmarking AI Agents on Complex Scientific Literature Discovery","display_name":"AutoResearchBench: Benchmarking AI Agents on Complex Scientific Literature Discovery","publication_year":2026,"publication_date":"2026-04-28","ids":{"openalex":"https://openalex.org/W7158365662","doi":"https://doi.org/10.48550/arxiv.2604.25256"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.25256","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25256","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.25256","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000024135","display_name":"Lei Xiong","orcid":"https://orcid.org/0000-0003-4551-4411"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xiong, Lei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134869226","display_name":"Kun Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Kun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134835204","display_name":"Ziyi Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Ziyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134852153","display_name":"Wenbo Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wenbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054455207","display_name":"Jin-Ge Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Jin-Ge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134822784","display_name":"Zheng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134825645","display_name":"Jingying Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Jingying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134869984","display_name":"Jianlyu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jianlyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134859874","display_name":"Hongjin Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Hongjin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134827013","display_name":"Xi Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134871189","display_name":"Qian Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Qian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134862408","display_name":"Hao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107163655","display_name":"\u8d8a \u6c88","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yue, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134836763","display_name":"Xiaan Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xiaan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134856745","display_name":"Yuyang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134855825","display_name":"Yesheng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yesheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134864987","display_name":"Haiyu Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Haiyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134858912","display_name":"Zhicheng Dou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Zhicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5000024135"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.32580000162124634,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.32580000162124634,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.21930000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.11869999766349792,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8152999877929688},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6134999990463257},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6026999950408936},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5788000226020813},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5687999725341797},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5378999710083008},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4887999892234802},{"id":"https://openalex.org/keywords/scientific-literature","display_name":"Scientific literature","score":0.41449999809265137}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8152999877929688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7314000129699707},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6134999990463257},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6026999950408936},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5812000036239624},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5788000226020813},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5687999725341797},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5378999710083008},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4887999892234802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.421999990940094},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.41449999809265137},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.39010000228881836},{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3862000107765198},{"id":"https://openalex.org/C2992562121","wikidata":"https://www.wikidata.org/wiki/Q3817808","display_name":"Scientific reasoning","level":2,"score":0.3594000041484833},{"id":"https://openalex.org/C23213167","wikidata":"https://www.wikidata.org/wiki/Q2351730","display_name":"Scientific progress","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C195732255","wikidata":"https://www.wikidata.org/wiki/Q981008","display_name":"Sociology of scientific knowledge","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.25256","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25256","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.25256","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25256","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Autonomous":[0],"scientific":[1,23,61,125],"research":[2,32,160,213],"is":[3,19,112,118],"significantly":[4],"advanced":[5],"thanks":[6],"to":[7,26,35,104,210],"the":[8,21,166,205,220],"development":[9],"of":[10,66,98,124,131,140],"AI":[11,46],"agents.":[12],"One":[13],"key":[14],"step":[15],"in":[16,49,214],"this":[17,51,215],"process":[18],"finding":[20],"right":[22],"literature,":[24],"whether":[25],"explore":[27],"existing":[28],"knowledge":[29],"for":[30,38,59,121,157],"a":[31,56,78,83,96],"problem,":[33],"or":[34],"acquire":[36],"evidence":[37],"verifying":[39],"assumptions":[40],"and":[41,88,134,143,148,162,188,207,224],"supporting":[42],"claims.":[43],"To":[44],"assess":[45],"agents'":[47],"capability":[48],"driving":[50],"process,":[52,87],"we":[53],"present":[54],"AutoResearchBench,":[55],"dedicated":[57],"benchmark":[58],"autonomous":[60,159],"literature":[62],"discovery.":[63],"AutoResearchBench":[64,111,154],"consists":[65],"two":[67],"complementary":[68],"task":[69],"types:":[70],"(1)":[71],"Deep":[72,186],"Research,":[73,91,193],"which":[74,92],"requires":[75,93],"tracking":[76],"down":[77],"specific":[79],"target":[80],"paper":[81],"through":[82],"progressive,":[84],"multi-step":[85],"probing":[86],"(2)":[89],"Wide":[90,192],"comprehensively":[94],"collecting":[95],"set":[97],"papers":[99,142],"satisfying":[100],"given":[101],"conditions.":[102],"Compared":[103],"previous":[105],"benchmarks":[106,177],"on":[107,185,191],"agentic":[108,175],"web":[109],"browsing,":[110],"distinguished":[113],"along":[114],"three":[115],"dimensions:":[116],"it":[117],"research-oriented,":[119],"calling":[120],"in-depth":[122],"comprehension":[123],"concepts;":[126],"literature-focused,":[127],"demanding":[128],"fine-grained":[129],"utilization":[130],"detailed":[132],"information;":[133],"open-ended,":[135],"involving":[136],"an":[137],"unknown":[138],"number":[139],"qualified":[141],"thus":[144],"requiring":[145],"deliberate":[146],"reasoning":[147],"search":[149],"throughout.":[150],"These":[151],"properties":[152],"make":[153],"uniquely":[155],"suited":[156],"evaluating":[158],"capabilities,":[161],"extraordinarily":[163],"challenging.":[164],"Even":[165],"most":[167],"powerful":[168],"LLMs,":[169],"despite":[170],"having":[171],"largely":[172],"conquered":[173],"general":[174],"web-browsing":[176],"such":[178],"as":[179],"BrowseComp,":[180],"achieve":[181],"only":[182],"9.39%":[183],"accuracy":[184],"Research":[187],"9.31%":[189],"IoU":[190],"while":[194],"many":[195],"other":[196],"strong":[197],"baselines":[198],"fall":[199],"below":[200],"5%.":[201],"We":[202,217],"publicly":[203,218],"release":[204,219],"dataset":[206],"evaluation":[208,222],"pipeline":[209],"facilitate":[211],"future":[212],"direction.":[216],"dataset,":[221],"pipeline,":[223],"code":[225],"at":[226],"https://github.com/CherYou/AutoResearchBench.":[227]},"counts_by_year":[],"updated_date":"2026-04-30T06:11:10.768123","created_date":"2026-04-30T00:00:00"}
