{"id":"https://openalex.org/W7161760004","doi":"https://doi.org/10.48550/arxiv.2605.19743","title":"EngiAI: A Multi-Agent Framework and Benchmark Suite for LLM-Driven Engineering Design","display_name":"EngiAI: A Multi-Agent Framework and Benchmark Suite for LLM-Driven Engineering Design","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161760004","doi":"https://doi.org/10.48550/arxiv.2605.19743"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.19743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.19743","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136526215","display_name":"Gioele Molinari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Molinari, Gioele","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002261407","display_name":"Florian Felten","orcid":"https://orcid.org/0000-0002-2874-3645"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Felten, Florian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026038683","display_name":"Soheyl Massoudi","orcid":"https://orcid.org/0000-0002-6763-3625"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Massoudi, Soheyl","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136594533","display_name":"Mark Fuge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fuge, Mark","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.3321000039577484,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.3321000039577484,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1378999948501587,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03959999978542328,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.9027000069618225},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.8529000282287598},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7170000076293945},{"id":"https://openalex.org/keywords/supervisor","display_name":"Supervisor","score":0.5504000186920166},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5447999835014343},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5121999979019165}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.9027000069618225},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.8529000282287598},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7384999990463257},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7170000076293945},{"id":"https://openalex.org/C2779110517","wikidata":"https://www.wikidata.org/wiki/Q1240788","display_name":"Supervisor","level":2,"score":0.5504000186920166},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5447999835014343},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5121999979019165},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.4830000102519989},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4424000084400177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4269999861717224},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3917999863624573},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26750001311302185},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.257999986410141},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2572000026702881},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.25130000710487366},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.19743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.19743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.4692056179046631,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Model":[2],"(LLM)":[3],"agents":[4,115],"are":[5],"increasingly":[6],"applied":[7],"to":[8,72,168,208],"engineering":[9],"design":[10],"tasks,":[11],"yet":[12],"existing":[13],"evaluation":[14,36,190],"frameworks":[15],"do":[16],"not":[17],"adequately":[18],"address":[19],"multi-agent":[20],"systems":[21],"that":[22,107,211],"combine":[23],"simulation,":[24],"retrieval,":[25,124,187],"and":[26,57,75,128,136],"manufacturing":[27],"preparation.":[28],"We":[29],"introduce":[30],"a":[31,39,61,89,98,117],"benchmark":[32,41,65,82,94,110],"suite":[33],"with":[34,42,66,155,164],"three":[35],"dimensions:":[37],"(1)":[38],"workflow":[40],"seven":[43,113],"prompt":[44],"styles":[45],"targeting":[46],"distinct":[47],"cognitive":[48],"demands-including":[49],"direct":[50],"tool":[51],"use,":[52],"semantic":[53],"disambiguation,":[54],"conditional":[55,172],"branching,":[56],"working-memory":[58],"tasks;":[59],"(2)":[60],"Retrieval-Augmented":[62],"Generation":[63],"(RAG)":[64],"gated":[67],"scoring":[68],"isolating":[69],"retrieval":[70],"contributions":[71],"parameter":[73],"selection;":[74],"(3)":[76],"an":[77],"High":[78],"Performance":[79],"Computing":[80],"(HPC)":[81],"evaluating":[83],"end-to-end":[84],"ML":[85],"training":[86],"orchestration":[87],"on":[88,105,147,174],"SLURM":[90],"cluster.":[91],"Alongside":[92],"the":[93,109,171,189],"we":[95],"present":[96],"EngiAI,":[97],"Multi-Agent":[99],"System":[100],"(MAS)":[101],"reference":[102],"implementation":[103],"built":[104],"LangGraph":[106],"operationalizes":[108],"by":[111],"coordinating":[112],"specialized":[114],"through":[116],"supervisor":[118],"architecture,":[119],"unifying":[120],"topology":[121],"optimization,":[122],"document":[123],"HPC":[125,193],"job":[126],"orchestration,":[127,194],"3D":[129],"printer":[130],"control.":[131],"Across":[132],"four":[133],"LLM":[134],"backends":[135],"two":[137],"EngiBench":[138],"problems,":[139],"proprietary":[140],"models":[141,152],"achieve":[142],"96-97%":[143],"average":[144],"task":[145,165],"completion":[146,166],"Beams2D,":[148],"while":[149,205],"open-source":[150],"4B-parameter":[151],"reach":[153],"55-78%,":[154],"clear":[156],"generational":[157],"improvement.":[158],"Conditional":[159],"branching":[160],"proves":[161],"most":[162],"challenging,":[163],"dropping":[167],"20-53%":[169],"for":[170],"style":[173],"Photonics2D.":[175],"RAG":[176],"gating":[177],"confirms":[178],"near-perfect":[179],"retrieval-augmented":[180],"scores":[181],"(about":[182],"1.0)":[183],"versus":[184],"near-zero":[185],"without":[186],"validating":[188],"design.":[191],"On":[192],"one":[195],"model":[196],"completes":[197],"all":[198],"pipeline":[199],"steps":[200],"in":[201],"100%":[202],"of":[203],"runs":[204],"another":[206],"drops":[207],"50%,":[209],"revealing":[210],"multi-step":[212],"instruction":[213],"following":[214],"degrades":[215],"over":[216],"long-running":[217],"workflows.":[218]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
