{"id":"https://openalex.org/W7133537324","doi":"https://doi.org/10.48550/arxiv.2603.02239","title":"Engineering Reasoning and Instruction (ERI) Benchmark: A Large Taxonomy-driven Dataset for Foundation Models and Agents","display_name":"Engineering Reasoning and Instruction (ERI) Benchmark: A Large Taxonomy-driven Dataset for Foundation Models and Agents","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7133537324","doi":"https://doi.org/10.48550/arxiv.2603.02239"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02239","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128116952","display_name":"MZ Naser","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Naser, MZ","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109673133","display_name":"Ahmad Awwad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Awwad, Ahmad Bani","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128070888","display_name":"Zoie McCreery","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McCreery, Zoie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128054750","display_name":"Radwa Eissa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eissa, Radwa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128050251","display_name":"Ahmad Naser","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naser, Ahmad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024267376","display_name":"Gianluca Cusatis","orcid":"https://orcid.org/0000-0001-7436-3910"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cusatis, Gianluca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128098235","display_name":"Andrew Metcalf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Metcalf, Andrew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002660274","display_name":"Kapil Chalil Madathil","orcid":"https://orcid.org/0000-0001-8938-9793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Madathil, Kapil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128126192","display_name":"Jamal Abdalla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdalla, Jamal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013571827","display_name":"Venkatesh Kodur","orcid":"https://orcid.org/0000-0003-2058-2725"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kodur, Venkatesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128043741","display_name":"Mohammad Reza Saeb","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saeb, Mohammad Reza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5128116952"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.13289999961853027,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.13289999961853027,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.11620000004768372,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.10649999976158142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6452000141143799},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.5536999702453613},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5134000182151794},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5109000205993652},{"id":"https://openalex.org/keywords/engineering-education","display_name":"Engineering education","score":0.41359999775886536},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.3449999988079071}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6452000141143799},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6388000249862671},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.5536999702453613},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5134000182151794},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5109000205993652},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42309999465942383},{"id":"https://openalex.org/C5041995","wikidata":"https://www.wikidata.org/wiki/Q853745","display_name":"Engineering education","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3522000014781952},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3450999855995178},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.3449999988079071},{"id":"https://openalex.org/C133112747","wikidata":"https://www.wikidata.org/wiki/Q7251931","display_name":"Protocol analysis","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25540000200271606},{"id":"https://openalex.org/C2778565505","wikidata":"https://www.wikidata.org/wiki/Q2207566","display_name":"Spec#","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25360000133514404},{"id":"https://openalex.org/C182449105","wikidata":"https://www.wikidata.org/wiki/Q3099732","display_name":"Technical report","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"Engineering":[1],"Reasoning":[2],"and":[3,15,22,39,42,45,59,61,67,75,84,112,120,147,166,174,183],"Instruction":[4],"(ERI)":[5],"benchmark":[6],"is":[7,46,159],"a":[8,86,107,137],"taxonomy-driven":[9],"instruction":[10,178],"dataset":[11,25],"designed":[12],"to":[13,151,156,170],"train":[14],"evaluate":[16],"engineering-capable":[17],"large":[18],"language":[19],"models":[20,94,114],"(LLMs)":[21],"agents.":[23],"This":[24],"spans":[26],"nine":[27],"engineering":[28,188],"fields":[29],"(namely:":[30],"civil,":[31],"mechanical,":[32],"electrical,":[33],"chemical,":[34],"environmental,":[35],"aerospace,":[36],"materials,":[37],"fire,":[38],"industrial":[40],"engineering)":[41],"55":[43],"subdomains,":[44],"crossed":[47],"with":[48,72,92,161],"seven":[49,82],"intent":[50],"types":[51],"(i.e.,":[52],"definition,":[53],"explanation,":[54],"calculation,":[55],"comparison,":[56],"design/synthesis,":[57],"troubleshooting,":[58],"code-related)":[60],"three":[62],"difficulty":[63],"tiers":[64],"(undergraduate,":[65],"graduate,":[66],"professional),":[68],"yielding":[69],"57,750":[70],"records":[71],"field/subdomain/type/difficulty":[73],"metadata":[74],"solution":[76],"formatting.":[77],"We":[78],"examined":[79],"ERI":[80,158],"via":[81],"LLMs":[83],"report":[85],"statistically":[87],"significant":[88],"three-tier":[89],"performance":[90,122],"structure,":[91],"frontier":[93],"(GPT-5,":[95],"Claude":[96],"Sonnet":[97],"4,":[98],"DeepSeek":[99],"V3.1)":[100],"achieving":[101],"mean":[102],"scores":[103],"above":[104],"4.30":[105],"on":[106,124],"five-point":[108],"scale,":[109],"while":[110],"mid-tier":[111],"smaller":[113],"exhibited":[115],"progressively":[116],"higher":[117],"failure":[118],"rates":[119],"steeper":[121],"degradation":[123],"graduate-level":[125],"questions.":[126],"To":[127],"address":[128],"circularity":[129],"concerns":[130],"inherent":[131],"in":[132,187],"LLM":[133],"benchmarks,":[134],"we":[135],"developed":[136],"convergent":[138],"validation":[139,164],"protocol":[140],"that":[141],"leverages":[142],"cross-provider":[143],"independence,":[144],"multi-judge":[145],"averaging,":[146],"frontier-model":[148],"agreement":[149],"analysis":[150],"empirically":[152],"bound":[153],"hallucination":[154],"risk":[155],"1.7%.":[157],"released":[160],"taxonomy":[162],"specifications,":[163],"scripts,":[165],"an":[167],"evaluation":[168],"harness":[169],"enable":[171],"reproducible":[172],"comparisons":[173],"regression":[175],"testing":[176],"for":[177],"tuning,":[179],"routing,":[180],"retrieval-augmented":[181],"evaluation,":[182],"agentic":[184],"tool-use":[185],"workflows":[186],"settings.":[189]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-05T00:00:00"}
