{"id":"https://openalex.org/W7131417132","doi":"https://doi.org/10.48550/arxiv.2602.19594","title":"ISO-Bench: Can Coding Agents Optimize Real-World Inference Workloads?","display_name":"ISO-Bench: Can Coding Agents Optimize Real-World Inference Workloads?","publication_year":2026,"publication_date":"2026-02-23","ids":{"openalex":"https://openalex.org/W7131417132","doi":"https://doi.org/10.48550/arxiv.2602.19594"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19594","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126676067","display_name":"Ayush Nangia","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nangia, Ayush","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126744520","display_name":"Shikhar Mishra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Shikhar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055209653","display_name":"Aman Gokrani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gokrani, Aman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123504887","display_name":"Paras Chopra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chopra, Paras","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5126676067"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12800000607967377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12800000607967377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10719999670982361,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.08760000020265579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.8087999820709229},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7638000249862671},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.699999988079071},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.583299994468689},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46889999508857727},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4465999901294708},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4293999969959259},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4104999899864197}],"concepts":[{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.8087999820709229},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.792900025844574},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7638000249862671},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.699999988079071},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.583299994468689},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5206000208854675},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46889999508857727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4553999900817871},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4465999901294708},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4293999969959259},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4104999899864197},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.37529999017715454},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26409998536109924},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.26170000433921814},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,57,140],"introduce":[1],"ISO-Bench,":[2],"a":[3,39],"benchmark":[4],"for":[5,109],"coding":[6,118],"agents":[7,129,144],"to":[8,81,103,136],"test":[9],"their":[10],"capabilities":[11],"on":[12],"real-world":[13],"inference":[14],"optimization":[15,50],"tasks.":[16],"These":[17],"tasks":[18,60],"were":[19],"taken":[20],"from":[21,61],"vLLM":[22],"and":[23,41,99,116],"SGLang,":[24],"two":[25],"of":[26,89],"the":[27,45,86,90,157],"most":[28],"popular":[29],"LLM":[30],"serving":[31],"frameworks.":[32],"Each":[33],"task":[34],"provides":[35],"an":[36,49],"agent":[37,46,124],"with":[38,65,145],"codebase":[40],"bottleneck":[42],"description,":[43],"whereby":[44],"must":[47],"produce":[48],"patch":[51],"evaluated":[52],"against":[53],"expert":[54],"human":[55],"solutions.":[56,139],"curated":[58],"54":[59],"merged":[62],"pull":[63],"requests":[64],"measurable":[66],"performance":[67],"improvements.":[68],"While":[69,112],"existing":[70],"benchmarks":[71],"heavily":[72],"use":[73],"runtime-based":[74],"metrics,":[75],"such":[76],"approaches":[77],"can":[78],"be":[79],"gamed":[80],"pass":[82],"tests":[83],"without":[84],"capturing":[85],"actual":[87],"intent":[88],"code":[91],"changes.":[92],"Therefore,":[93],"we":[94,120],"combine":[95],"both":[96,106,114],"hard":[97],"(execution-based)":[98],"soft":[100],"(LLM-based)":[101],"metrics":[102],"show":[104,142],"that":[105,143],"are":[107],"necessary":[108],"complete":[110],"evaluation.":[111],"evaluating":[113],"closed":[115],"open-source":[117],"agents,":[119],"find":[121],"no":[122],"single":[123],"dominates":[125],"across":[126],"codebases.":[127],"Surprisingly,":[128],"often":[130],"identify":[131],"correct":[132],"bottlenecks":[133],"but":[134],"fail":[135],"execute":[137],"working":[138],"also":[141],"identical":[146],"underlying":[147],"models":[148],"differ":[149],"substantially,":[150],"suggesting":[151],"scaffolding":[152],"is":[153],"as":[154,156],"important":[155],"model.":[158]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
