{"id":"https://openalex.org/W7154463892","doi":"https://doi.org/10.48550/arxiv.2604.12290","title":"Frontier-Eng: Benchmarking Self-Evolving Agents on Real-World Engineering Tasks with Generative Optimization","display_name":"Frontier-Eng: Benchmarking Self-Evolving Agents on Real-World Engineering Tasks with Generative Optimization","publication_year":2026,"publication_date":"2026-04-14","ids":{"openalex":"https://openalex.org/W7154463892","doi":"https://doi.org/10.48550/arxiv.2604.12290"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.12290","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12290","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.12290","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133674494","display_name":"Yizhe Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chi, Yizhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126139761","display_name":"Deyao Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Deyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133664397","display_name":"Dapeng Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Dapeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021533156","display_name":"Tianwei Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Tianwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133645694","display_name":"Kaisen Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Kaisen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133715880","display_name":"Boshi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Boshi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133719821","display_name":"Zhe Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133661940","display_name":"Xiaoyan Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Xiaoyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133668208","display_name":"Bingxiang He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Bingxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133722439","display_name":"Han Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101252846","display_name":"Weiyang Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Weiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120403673","display_name":"Dianqiao Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei, Dianqiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133659256","display_name":"Qingle Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Qingle","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052881929","display_name":"Houde Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Houde","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133654814","display_name":"Bowen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123207899","display_name":"Situ Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Situ","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133701726","display_name":"Youjie Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Youjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133711592","display_name":"Yifan Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133677639","display_name":"Calvin Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Calvin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133654424","display_name":"Eren Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Eren","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133632788","display_name":"Qinhuai Na","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Na, Qinhuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":21,"corresponding_author_ids":["https://openalex.org/A5133674494"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3328999876976013,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3328999876976013,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18940000236034393,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.0406000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.8065000176429749},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7439000010490417},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6862000226974487},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5005000233650208},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4269999861717224},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4185999929904938},{"id":"https://openalex.org/keywords/iterative-learning-control","display_name":"Iterative learning control","score":0.3752000033855438},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.334199994802475},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.33320000767707825}],"concepts":[{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.8065000176429749},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7439000010490417},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.732699990272522},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6862000226974487},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5005000233650208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4551999866962433},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4269999861717224},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4185999929904938},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4090000092983246},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.3752000033855438},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30570000410079956},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C509989072","wikidata":"https://www.wikidata.org/wiki/Q15188241","display_name":"Model-driven architecture","level":4,"score":0.29190000891685486},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.28870001435279846},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.28529998660087585},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C55660270","wikidata":"https://www.wikidata.org/wiki/Q5164377","display_name":"Constrained optimization","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.27869999408721924},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2605000138282776},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.12290","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12290","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.12290","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12290","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"LLM":[1],"agent":[2,57],"benchmarks,":[3],"which":[4,55],"predominantly":[5],"focus":[6],"on":[7],"binary":[8],"pass/fail":[9],"tasks":[10,76,86],"such":[11],"as":[12],"code":[13],"generation":[14],"or":[15],"search-based":[16],"question":[17],"answering,":[18],"often":[19,28],"neglect":[20],"the":[21,31,123,127,178],"value":[22],"of":[23,34,180],"real-world":[24],"engineering":[25,80,194],"that":[26,94,118,154],"is":[27],"captured":[29],"through":[30],"iterative":[32,51],"optimization":[33,48],"feasible":[35],"designs.":[36],"To":[37],"this":[38],"end,":[39],"we":[40],"introduce":[41],"Frontier-Eng,":[42],"a":[43,69,137,168,173],"human-verified":[44],"benchmark":[45,128],"for":[46,131,164,176],"generative":[47],"--":[49,73],"an":[50,56],"propose-execute-evaluate":[52],"loop":[53],"in":[54,89,141],"generates":[58],"candidate":[59],"artifacts,":[60],"receives":[61],"executable":[62,188],"verifier":[63],"feedback,":[64],"and":[65,92,99,146,159],"revises":[66],"them":[67],"under":[68,104,167],"fixed":[70,169],"interaction":[71],"budget":[72],"spanning":[74],"$47$":[75],"across":[77],"five":[78],"broad":[79],"categories.":[81],"Unlike":[82],"previous":[83],"suites,":[84],"Frontier-Eng":[85,171],"are":[87],"grounded":[88],"industrial-grade":[90],"simulators":[91],"verifiers":[93],"provide":[95],"continuous":[96],"reward":[97],"signals":[98],"enforce":[100],"hard":[101],"feasibility":[102],"constraints":[103],"constrained":[105],"budgets.":[106],"We":[107,151],"evaluate":[108],"eight":[109],"frontier":[110],"language":[111],"models":[112],"using":[113],"representative":[114],"search":[115],"frameworks,":[116],"finding":[117],"while":[119],"GPT":[120],"5.4":[121],"achieves":[122],"most":[124],"robust":[125],"performance,":[126],"remains":[129,162],"challenging":[130],"all":[132],"models.":[133],"Our":[134],"analysis":[135],"suggests":[136],"dual":[138],"power-law":[139],"decay":[140],"improvement":[142],"frequency":[143],"($\\sim$":[144,148],"1/iteration)":[145],"magnitude":[147],"1/improvement":[149],"count).":[150],"further":[152],"show":[153],"although":[155],"width":[156],"improves":[157],"parallelism":[158],"diversity,":[160],"depth":[161],"crucial":[163],"hard-won":[165],"improvements":[166],"budget.":[170],"establishes":[172],"new":[174],"standard":[175],"assessing":[177],"capacity":[179],"AI":[181],"agents":[182],"to":[183,190],"integrate":[184],"domain":[185],"knowledge":[186],"with":[187],"feedback":[189],"solve":[191],"complex,":[192],"open-ended":[193],"problems.":[195]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-16T00:00:00"}
