{"id":"https://openalex.org/W7160416041","doi":"https://doi.org/10.48550/arxiv.2605.03546","title":"ProgramBench: Can Language Models Rebuild Programs From Scratch?","display_name":"ProgramBench: Can Language Models Rebuild Programs From Scratch?","publication_year":2026,"publication_date":"2026-05-05","ids":{"openalex":"https://openalex.org/W7160416041","doi":"https://doi.org/10.48550/arxiv.2605.03546"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.03546","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03546","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.03546","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135479297","display_name":"John Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, John","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085951731","display_name":"K. Lieret","orcid":"https://orcid.org/0000-0003-2792-7511"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lieret, Kilian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135501525","display_name":"Jeffrey Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jeffrey","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135421635","display_name":"Parth Thakkar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thakkar, Parth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120719884","display_name":"Dmitrii Pedchenko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pedchenko, Dmitrii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036439306","display_name":"Sten Sootla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sootla, Sten","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135522853","display_name":"Emily McMilin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McMilin, Emily","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078519761","display_name":"Pengcheng Yin","orcid":"https://orcid.org/0000-0003-2739-1032"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Pengcheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135527265","display_name":"Rui Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135489489","display_name":"Gabriel Synnaeve","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Synnaeve, Gabriel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135527647","display_name":"Diyi Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Diyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035538068","display_name":"Ofir Press","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Press, Ofir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.19020000100135803,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.19020000100135803,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.12919999659061432,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.09480000287294388,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.7778000235557556},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.6208000183105469},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5062999725341797},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5026000142097473},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4318999946117401},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.41679999232292175},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.3695000112056732},{"id":"https://openalex.org/keywords/software-construction","display_name":"Software construction","score":0.3589000105857849},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.35249999165534973}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.843500018119812},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.7778000235557556},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.6496000289916992},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.6208000183105469},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5375999808311462},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5062999725341797},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5026000142097473},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4318999946117401},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.41679999232292175},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.3695000112056732},{"id":"https://openalex.org/C186846655","wikidata":"https://www.wikidata.org/wiki/Q3398377","display_name":"Software construction","level":4,"score":0.3589000105857849},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.35249999165534973},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.310699999332428},{"id":"https://openalex.org/C76518257","wikidata":"https://www.wikidata.org/wiki/Q271680","display_name":"Software framework","level":5,"score":0.30059999227523804},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.29600000381469727},{"id":"https://openalex.org/C35869016","wikidata":"https://www.wikidata.org/wiki/Q846636","display_name":"Software architecture","level":3,"score":0.2953000068664551},{"id":"https://openalex.org/C2983609787","wikidata":"https://www.wikidata.org/wiki/Q10534782","display_name":"Software implementation","level":3,"score":0.2937999963760376},{"id":"https://openalex.org/C82214349","wikidata":"https://www.wikidata.org/wiki/Q657339","display_name":"Software metric","level":5,"score":0.29339998960494995},{"id":"https://openalex.org/C174683762","wikidata":"https://www.wikidata.org/wiki/Q609588","display_name":"Component-based software engineering","level":4,"score":0.29109999537467957},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C48002344","wikidata":"https://www.wikidata.org/wiki/Q2919644","display_name":"Verification and validation","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.27480000257492065},{"id":"https://openalex.org/C1009929","wikidata":"https://www.wikidata.org/wiki/Q179550","display_name":"Software bug","level":3,"score":0.266400009393692},{"id":"https://openalex.org/C2984968299","wikidata":"https://www.wikidata.org/wiki/Q1077784","display_name":"Software tool","level":3,"score":0.2653999924659729}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.03546","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03546","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.03546","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03546","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4347892999649048}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Turning":[0],"ideas":[1],"into":[2],"full":[3],"software":[4,41,72,77,126],"projects":[5],"from":[6,119,168],"scratch":[7],"has":[8],"become":[9],"a":[10,54,59,83,93],"popular":[11],"use":[12],"case":[13],"for":[14],"language":[15],"models.":[16],"Agents":[17],"are":[18,104],"being":[19],"deployed":[20],"to":[21,38,67,75,123],"seed,":[22],"maintain,":[23],"and":[24,85,91,131,139],"grow":[25],"codebases":[26],"over":[27],"extended":[28],"periods":[29],"with":[30,147],"minimal":[31],"human":[32],"oversight.":[33],"Such":[34],"settings":[35],"require":[36],"models":[37],"make":[39],"high-level":[40],"architecture":[42],"decisions.":[43],"However,":[44],"existing":[45],"benchmarks":[46],"measure":[47,68],"focused,":[48],"limited":[49],"tasks":[50,117],"such":[51,127],"as":[52,128],"fixing":[53],"single":[55],"bug":[56],"or":[57],"developing":[58],"single,":[60],"specified":[61],"feature.":[62],"We":[63,135],"therefore":[64],"introduce":[65],"ProgramBench":[66],"the":[69,97,132,148],"ability":[70],"of":[71,153,158],"engineering":[73],"agents":[74,88],"develop":[76],"holisitically.":[78],"In":[79],"ProgramBench,":[80],"given":[81],"only":[82,156],"program":[84],"its":[86],"documentation,":[87],"must":[89],"architect":[90],"implement":[92],"codebase":[94],"that":[95,141,165],"matches":[96],"reference":[98],"executable's":[99],"behavior.":[100],"End-to-end":[101],"behavioral":[102],"tests":[103,154],"generated":[105],"via":[106],"agent-driven":[107],"fuzzing,":[108],"enabling":[109],"evaluation":[110],"without":[111],"prescribing":[112],"implementation":[113],"structure.":[114],"Our":[115],"200":[116],"range":[118],"compact":[120],"CLI":[121],"tools":[122],"widely":[124],"used":[125],"FFmpeg,":[129],"SQLite,":[130],"PHP":[133],"interpreter.":[134],"evaluate":[136],"9":[137],"LMs":[138],"find":[140],"none":[142],"fully":[143],"resolve":[144],"any":[145],"task,":[146],"best":[149],"model":[150],"passing":[151],"95\\%":[152],"on":[155],"3\\%":[157],"tasks.":[159],"Models":[160],"favor":[161],"monolithic,":[162],"single-file":[163],"implementations":[164],"diverge":[166],"sharply":[167],"human-written":[169],"code.":[170]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-07T00:00:00"}
