{"id":"https://openalex.org/W7143345454","doi":"https://doi.org/10.48550/arxiv.2603.26499","title":"AIRA_2: Overcoming Bottlenecks in AI Research Agents","display_name":"AIRA_2: Overcoming Bottlenecks in AI Research Agents","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143345454","doi":"https://doi.org/10.48550/arxiv.2603.26499"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26499","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061387148","display_name":"Karen Hambardzumyan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hambardzumyan, Karen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130958323","display_name":"Nicolas Baldwin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baldwin, Nicolas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129980960","display_name":"Edan Toledo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Toledo, Edan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070202776","display_name":"Rishi Hazra","orcid":"https://orcid.org/0000-0003-3422-2085"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hazra, Rishi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002457261","display_name":"Michael Kuchnik","orcid":"https://orcid.org/0000-0002-0805-1828"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuchnik, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092990928","display_name":"Bassel Al Omari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Omari, Bassel Al","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125420024","display_name":"Thomas Simon Foster","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foster, Thomas Simon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125430799","display_name":"Anton Protopopov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Protopopov, Anton","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073953372","display_name":"Jean-Christophe Gagnon-Audet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gagnon-Audet, Jean-Christophe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010085477","display_name":"Ishita Mediratta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mediratta, Ishita","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130973028","display_name":"Kelvin Niu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niu, Kelvin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000749837","display_name":"Michael Shvartsman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shvartsman, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130988709","display_name":"Alisia Lupidi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lupidi, Alisia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130986780","display_name":"Alexis Audran-Reiss","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Audran-Reiss, Alexis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023542938","display_name":"Parth H. Pathak","orcid":"https://orcid.org/0000-0002-0793-002X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pathak, Parth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079740967","display_name":"Tatiana Shavrina","orcid":"https://orcid.org/0000-0002-6976-0185"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shavrina, Tatiana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020439365","display_name":"Despoina Magka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Magka, Despoina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130959898","display_name":"Hela Momand","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Momand, Hela","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130979069","display_name":"Derek Dunfield","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dunfield, Derek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004751764","display_name":"Nicola Cancedda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cancedda, Nicola","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049427071","display_name":"Pontus Stenetorp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stenetorp, Pontus","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130930653","display_name":"Carole-Jean Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Carole-Jean","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059094093","display_name":"Jakob Foerster","orcid":"https://orcid.org/0000-0001-9688-2498"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foerster, Jakob Nicolaus","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062949033","display_name":"Yoram Bachrach","orcid":"https://orcid.org/0000-0002-4382-7636"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bachrach, Yoram","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5057031908","display_name":"Martin Josifoski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Josifoski, Martin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":25,"corresponding_author_ids":["https://openalex.org/A5061387148"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17669999599456787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17669999599456787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.0731000006198883,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.0674000009894371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.5428000092506409},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5374000072479248},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.48339998722076416},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.47350001335144043},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.43639999628067017},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4300000071525574},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.4269999861717224},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4156999886035919},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4138000011444092},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.3887999951839447}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7325000166893005},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.5428000092506409},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5374000072479248},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.48339998722076416},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.47350001335144043},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4300000071525574},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.4269999861717224},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4156999886035919},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4138000011444092},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37049999833106995},{"id":"https://openalex.org/C2780264999","wikidata":"https://www.wikidata.org/wiki/Q7445032","display_name":"Security domain","level":2,"score":0.36149999499320984},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3456000089645386},{"id":"https://openalex.org/C2777489069","wikidata":"https://www.wikidata.org/wiki/Q1589822","display_name":"Ceiling (cloud)","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3125999867916107},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30809998512268066},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.29589998722076416},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.29159998893737793},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C174839445","wikidata":"https://www.wikidata.org/wiki/Q1134386","display_name":"Lock (firearm)","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C22684755","wikidata":"https://www.wikidata.org/wiki/Q847526","display_name":"Queueing theory","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C148417208","wikidata":"https://www.wikidata.org/wiki/Q4825882","display_name":"Authentication (law)","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Existing":[0],"research":[1,10,137],"has":[2],"identified":[3],"three":[4,65],"structural":[5],"performance":[6,34,148],"bottlenecks":[7,63],"in":[8,164],"AI":[9],"agents:":[11],"(1)":[12],"synchronous":[13],"single-GPU":[14],"execution":[15],"constrains":[16],"sample":[17],"throughput,":[18],"limiting":[19],"the":[20,43,119,161],"benefit":[21],"of":[22,46,108,134],"search;":[23],"(2)":[24],"a":[25,52,78,85,104,150],"generalization":[26],"gap":[27],"where":[28],"validation-based":[29],"selection":[30],"causes":[31],"overfitting":[32],"and":[33,41,89,97,113,159],"to":[35],"degrade":[36],"over":[37],"extended":[38],"search":[39,55],"horizons;":[40],"(3)":[42],"limited":[44],"capability":[45],"fixed,":[47],"single-turn":[48],"LLM":[49,157],"operators":[50],"imposes":[51],"ceiling":[53],"on":[54,131],"performance.":[56],"We":[57],"introduce":[58],"AIRA$_2$,":[59],"which":[60,122],"addresses":[61],"these":[62],"through":[64],"architectural":[66,143],"choices:":[67],"an":[68],"asynchronous":[69],"multi-GPU":[70],"worker":[71],"pool":[72],"that":[73,83,92,141,147,154,160],"increases":[74],"experiment":[75],"throughput":[76],"linearly;":[77],"Hidden":[79],"Consistent":[80],"Evaluation":[81],"protocol":[82],"delivers":[84],"reliable":[86],"evaluation":[87,170],"signal;":[88],"ReAct":[90],"agents":[91],"dynamically":[93],"scope":[94],"their":[95],"actions":[96],"debug":[98],"interactively.":[99],"On":[100,125],"MLE-bench-30,":[101],"AIRA$^{\\dagger}_{2}$":[102],"achieves":[103,123],"mean":[105],"Percentile":[106],"Rank":[107],"81.5%":[109],"at":[110,115],"24":[111],"hours":[112],"83.1%":[114],"72":[116],"hours,":[117],"outperforming":[118],"strongest":[120],"baseline,":[121],"72.7%.":[124],"AIRS-Bench,":[126],"AIRA$_2$":[127],"exceeds":[128],"human":[129],"state-of-the-art":[130],"6":[132],"out":[133],"20":[135],"diverse":[136],"tasks.":[138],"Ablations":[139],"confirm":[140],"each":[142],"component":[144],"is":[145],"necessary,":[146],"follows":[149],"predictable":[151],"scaling":[152],"law":[153],"transfers":[155],"across":[156],"backbones,":[158],"\"overfitting\"":[162],"reported":[163],"prior":[165],"work":[166],"was":[167],"driven":[168],"by":[169],"noise":[171],"rather":[172],"than":[173],"true":[174],"data":[175],"memorization.":[176]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-31T00:00:00"}
