{"id":"https://openalex.org/W7160904950","doi":"https://doi.org/10.48550/arxiv.2605.08366","title":"SWE Atlas: Benchmarking Coding Agents Beyond Issue Resolution","display_name":"SWE Atlas: Benchmarking Coding Agents Beyond Issue Resolution","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160904950","doi":"https://doi.org/10.48550/arxiv.2605.08366"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08366","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08366","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08366","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122348961","display_name":"Mohit Raghavendra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raghavendra, Mohit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135982046","display_name":"Soham Dan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dan, Soham","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008600285","display_name":"Miguel Romero Calvo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Calvo, Miguel Romero","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135925562","display_name":"Yannis Yiming He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yannis Yiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116082871","display_name":"Johannes Mols","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mols, Johannes Baptist","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135949045","display_name":"Gautam Anand","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anand, Gautam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135949199","display_name":"Cole McCollum","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McCollum, Cole","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135954524","display_name":"Edgar Arakelyan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arakelyan, Edgar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050757455","display_name":"Vijay Bharadwaj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bharadwaj, Vijay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121358425","display_name":"Andrew Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Andrew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135989303","display_name":"Jeff Da","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Da, Jeff","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135994319","display_name":"MohammadHossein Rezaei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rezaei, MohammadHossein","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135952954","display_name":"Bing Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Bing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135950496","display_name":"Brad Kenstler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kenstler, Brad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122332660","display_name":"Yunzhong He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yunzhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":15,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.8636999726295471,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.8636999726295471,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.023099999874830246,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.016699999570846558,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.8666999936103821},{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.6869999766349792},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.5480999946594238},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5439000129699707},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5194000005722046},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5002999901771545},{"id":"https://openalex.org/keywords/test-suite","display_name":"Test suite","score":0.49709999561309814},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4440000057220459},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.4343000054359436}],"concepts":[{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.8666999936103821},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7488999962806702},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.6869999766349792},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.5480999946594238},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5439000129699707},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5414000153541565},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5194000005722046},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5002999901771545},{"id":"https://openalex.org/C151552104","wikidata":"https://www.wikidata.org/wiki/Q7705809","display_name":"Test suite","level":4,"score":0.49709999561309814},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4440000057220459},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.4343000054359436},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.37779998779296875},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.328000009059906},{"id":"https://openalex.org/C2776673561","wikidata":"https://www.wikidata.org/wiki/Q655357","display_name":"Atlas (anatomy)","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.30239999294281006},{"id":"https://openalex.org/C2778736646","wikidata":"https://www.wikidata.org/wiki/Q1143070","display_name":"Software suite","level":3,"score":0.29919999837875366},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29159998893737793},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.29109999537467957},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2906999886035919},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.29030001163482666},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2791999876499176},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C98183937","wikidata":"https://www.wikidata.org/wiki/Q2112188","display_name":"Program analysis","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C2780615140","wikidata":"https://www.wikidata.org/wiki/Q920419","display_name":"Upgrade","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C128942645","wikidata":"https://www.wikidata.org/wiki/Q1568346","display_name":"Test case","level":3,"score":0.265500009059906},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.26429998874664307},{"id":"https://openalex.org/C97686452","wikidata":"https://www.wikidata.org/wiki/Q7604153","display_name":"Static analysis","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08366","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08366","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08366","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08366","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.42924797534942627,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,92],"introduce":[1],"SWE":[2,28,33,102,159],"Atlas,":[3],"a":[4,94,162],"benchmark":[5],"suite":[6,165],"for":[7,166],"coding":[8,174],"agents":[9],"spanning":[10],"three":[11,36],"professional":[12],"software":[13,78,154],"engineering":[14,79,155,171],"workflows:":[15],"Codebase":[16],"Q&amp;A":[17],"(124":[18],"tasks),":[19,23],"Test":[20],"Writing":[21],"(90":[22],"and":[24,52,83,89,98,104,108,135,151,170],"Refactoring":[25],"(70":[26],"tasks).":[27],"Atlas":[29,103,160],"differs":[30],"from":[31],"prior":[32],"benchmarks":[34],"in":[35,173],"key":[37],"ways:":[38],"it":[39],"targets":[40],"underrepresented":[41],"but":[42],"practically":[43],"important":[44],"task":[45,56],"categories,":[46],"uses":[47],"comprehensive":[48],"category-specific":[49],"evaluation":[50,64,164],"protocols,":[51],"adopts":[53],"under-specified,":[54],"agentic":[55],"formulations":[57],"that":[58,106,127],"better":[59],"reflect":[60],"real-world":[61],"usage.":[62],"Its":[63],"framework":[65],"combines":[66],"programmatic":[67],"checks":[68],"with":[69,144],"rubric-based":[70],"assessment.":[71],"This":[72],"goes":[73],"beyond":[74],"functional":[75],"correctness,":[76],"evaluating":[77],"quality,":[80],"including":[81],"test":[82],"refactor":[84],"completeness,":[85],"maintainability,":[86],"reusable":[87],"abstractions,":[88],"codebase":[90,133],"hygiene.":[91],"evaluate":[93],"range":[95],"of":[96],"frontier":[97],"open-weight":[99,120],"models":[100,121,129,141],"on":[101,131],"find":[105],"GPT-5.4":[107],"Opus":[109],"4.7":[110],"achieve":[111],"the":[112,118],"strongest":[113],"overall":[114],"performance,":[115],"while":[116],"even":[117,139],"best":[119,156],"score":[122],"poorly.":[123],"Our":[124],"analysis":[125],"suggests":[126],"top":[128,140],"rely":[130],"extensive":[132],"exploration":[134],"runtime-driven":[136],"reasoning.":[137],"However,":[138],"consistently":[142],"struggle":[143],"subtle":[145],"edge":[146],"cases,":[147],"complex":[148],"runtime":[149],"analysis,":[150],"adherence":[152],"to":[153],"practices.":[157],"Overall,":[158],"provides":[161],"complementary":[163],"measuring":[167],"both":[168],"correctness":[169],"quality":[172],"agents.":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
