{"id":"https://openalex.org/W7133301435","doi":"https://doi.org/10.48550/arxiv.2603.01692","title":"Reasoning as Gradient: Scaling MLE Agents Beyond Tree Search","display_name":"Reasoning as Gradient: Scaling MLE Agents Beyond Tree Search","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133301435","doi":"https://doi.org/10.48550/arxiv.2603.01692"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01692","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01692","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01692","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127936964","display_name":"Yifei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yifei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128019762","display_name":"Xu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127915655","display_name":"Xiao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119837855","display_name":"Bowen Xian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xian, Bowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128014579","display_name":"Qizheng Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Qizheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036818938","display_name":"Shikai Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Shikai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127972274","display_name":"Jingyuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jingyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127951507","display_name":"Jian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128002279","display_name":"Mingrui Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Mingrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128021160","display_name":"Weiqing Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Weiqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127948903","display_name":"Jiang Bian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bian, Jiang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5127936964"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.24879999458789825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.24879999458789825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.10909999907016754,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.09220000356435776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.5385000109672546},{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.4754999876022339},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.46950000524520874},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.43290001153945923},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.38510000705718994},{"id":"https://openalex.org/keywords/preference-elicitation","display_name":"Preference elicitation","score":0.31119999289512634},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.31049999594688416}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6814000010490417},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.5385000109672546},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.4754999876022339},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.46950000524520874},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.43290001153945923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4309999942779541},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41609999537467957},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4074000120162964},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.38510000705718994},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.359499990940094},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.31119999289512634},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.28450000286102295},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.26170000433921814}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01692","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01692","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01692","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01692","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLM-based":[0],"agents":[1],"for":[2,126],"machine":[3],"learning":[4],"engineering":[5],"(MLE)":[6],"predominantly":[7],"rely":[8],"on":[9,96,103],"tree":[10,120],"search,":[11],"a":[12,79,91,99,104,114],"form":[13],"of":[14,151],"gradient-free":[15],"optimization":[16,137,157],"that":[17,56,82],"uses":[18],"scalar":[19],"validation":[20],"scores":[21],"to":[22,36,40,65,70,75],"rank":[23],"candidates.":[24],"As":[25],"LLM":[26],"reasoning":[27,64,128,133],"capabilities":[28],"improve,":[29],"exhaustive":[30,130],"enumeration":[31],"becomes":[32],"increasingly":[33,160],"inefficient":[34],"compared":[35],"directed":[37],"updates,":[38],"analogous":[39],"how":[41],"accurate":[42],"gradients":[43],"enable":[44],"efficient":[45],"descent":[46],"over":[47],"random":[48],"search.":[49],"We":[50,163],"introduce":[51],"\\textsc{Gome},":[52],"an":[53,159],"MLE":[54],"agent":[55],"operationalizes":[57],"gradient-based":[58,136,156],"optimization.":[59,77],"\\textsc{Gome}":[60,89],"maps":[61],"structured":[62],"diagnostic":[63],"gradient":[66],"computation,":[67],"success":[68],"memory":[69],"momentum,":[71],"and":[72,167],"multi-trace":[73],"execution":[74],"distributed":[76],"Under":[78],"closed-world":[80],"protocol":[81],"isolates":[83],"architectural":[84],"effects":[85],"from":[86],"external":[87],"knowledge,":[88],"achieves":[90],"state-of-the-art":[92],"35.1\\%":[93],"any-medal":[94],"rate":[95],"MLE-Bench":[97],"with":[98,117,140],"restricted":[100],"12-hour":[101],"budget":[102],"single":[105],"V100":[106],"GPU.":[107],"Scaling":[108],"experiments":[109],"across":[110],"10":[111],"models":[112],"reveal":[113],"critical":[115],"crossover:":[116],"weaker":[118],"models,":[119],"search":[121],"retains":[122],"advantages":[123],"by":[124],"compensating":[125],"unreliable":[127],"through":[129],"exploration;":[131],"as":[132,158],"capability":[134],"strengthens,":[135],"progressively":[138],"outperforms,":[139],"the":[141,148],"gap":[142],"widening":[143],"at":[144,170],"frontier-tier":[145],"models.":[146],"Given":[147],"rapid":[149],"advancement":[150],"reasoning-oriented":[152],"LLMs,":[153],"this":[154],"positions":[155],"favorable":[161],"paradigm.":[162],"release":[164],"our":[165],"codebase":[166],"GPT-5":[168],"traces":[169],"https://github.com/microsoft/RD-Agent.":[171]},"counts_by_year":[],"updated_date":"2026-03-12T06:13:28.667946","created_date":"2026-03-04T00:00:00"}
