{"id":"https://openalex.org/W7160825965","doi":"https://doi.org/10.48550/arxiv.2605.07122","title":"RepoZero: Can LLMs Generate a Code Repository from Scratch?","display_name":"RepoZero: Can LLMs Generate a Code Repository from Scratch?","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160825965","doi":"https://doi.org/10.48550/arxiv.2605.07122"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07122","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07122","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07122","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121313846","display_name":"Zhaoxi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhaoxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135868827","display_name":"Yiming Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135892798","display_name":"Weikang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Jiahui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135853382","display_name":"Jiahui Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Weikang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135853400","display_name":"Yunfang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiaoshuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Qian, Liwei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Liwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Pei, Xin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Huang, Jizhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Jizhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sun, Run","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Run","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wu, Yunfang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yunfang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18950000405311584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18950000405311584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.17679999768733978,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08959999680519104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5879999995231628},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5554999709129333},{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.553600013256073},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4564000070095062},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.414900004863739},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4056999981403351},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.38199999928474426},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.3693999946117401},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.36649999022483826},{"id":"https://openalex.org/keywords/software-evolution","display_name":"Software evolution","score":0.3643999993801117}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7350000143051147},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5879999995231628},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5554999709129333},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.553600013256073},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.46160000562667847},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4564000070095062},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.414900004863739},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4056999981403351},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.38199999928474426},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.36649999022483826},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3662000000476837},{"id":"https://openalex.org/C202105479","wikidata":"https://www.wikidata.org/wiki/Q265013","display_name":"Software evolution","level":5,"score":0.3643999993801117},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.35510000586509705},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35420000553131104},{"id":"https://openalex.org/C148027188","wikidata":"https://www.wikidata.org/wiki/Q907375","display_name":"Unit testing","level":3,"score":0.3456000089645386},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.31630000472068787},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.3147999942302704},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C53942775","wikidata":"https://www.wikidata.org/wiki/Q1211721","display_name":"Code coverage","level":3,"score":0.28780001401901245},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.28519999980926514},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C76518257","wikidata":"https://www.wikidata.org/wiki/Q271680","display_name":"Software framework","level":5,"score":0.2732999920845032},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C2779639559","wikidata":"https://www.wikidata.org/wiki/Q7661178","display_name":"Symbolic execution","level":3,"score":0.2590999901294708},{"id":"https://openalex.org/C128942645","wikidata":"https://www.wikidata.org/wiki/Q1568346","display_name":"Test case","level":3,"score":0.2565999925136566},{"id":"https://openalex.org/C2778583558","wikidata":"https://www.wikidata.org/wiki/Q771245","display_name":"Code reuse","level":3,"score":0.2533999979496002},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07122","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07122","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07122","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07122","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"recently":[5],"shown":[6],"remarkable":[7],"progress":[8],"in":[9],"code":[10,218],"generation,":[11,219],"yet":[12],"their":[13],"ability":[14],"to":[15,80],"construct":[16],"complete":[17],"software":[18,202],"repositories":[19],"from":[20,74],"scratch":[21],"remains":[22],"poorly":[23],"understood.":[24],"A":[25],"fundamental":[26],"bottleneck":[27],"is":[28,79],"the":[29,62,102,181],"lack":[30],"of":[31,71],"verifiable":[32],"and":[33,53,130,137,159,175,200,213,220],"scalable":[34],"evaluation:":[35],"existing":[36,122],"benchmarks":[37],"either":[38],"focus":[39],"on":[40,45,143],"patch-based":[41],"editing":[42],"or":[43,47],"rely":[44],"human":[46],"LLM-based":[48,232],"judgments,":[49],"which":[50],"introduce":[51,134],"bias":[52],"limit":[54],"reproducibility.":[55],"In":[56],"this":[57,144],"work,":[58],"we":[59,133,146],"present":[60],"RepoZero,":[61],"first":[63],"benchmark":[64],"that":[65,98,154,179],"enables":[66],"fully":[67],"automated,":[68],"execution-based":[69],"verification":[70],"repository-level":[72,167],"generation":[73,82,158,225],"scratch.":[75],"Our":[76,205],"key":[77],"idea":[78],"reformulate":[81],"as":[83,209,226],"repository":[84,96],"reproduction:":[85],"given":[86],"only":[87,186],"API":[88],"specifications,":[89],"an":[90,94,148],"agent":[91,176],"must":[92],"re-implement":[93],"entire":[95],"such":[97],"its":[99],"behavior":[100],"matches":[101],"original":[103],"implementation.":[104],"This":[105],"design":[106],"allows":[107],"for":[108,166,216,230],"strict":[109],"black-box":[110],"validation":[111],"via":[112,223],"output":[113],"equivalence,":[114],"while":[115],"naturally":[116],"supporting":[117],"large-scale":[118],"construction":[119],"by":[120],"reusing":[121],"open-source":[123],"repositories.":[124],"To":[125],"further":[126],"mitigate":[127],"data":[128],"leakage":[129],"shortcut":[131],"solutions,":[132],"cross-language":[135],"constraints":[136],"a":[138,194,210,227],"sandboxed":[139],"evaluation":[140],"protocol.":[141],"Building":[142],"benchmark,":[145],"propose":[147],"Agentic":[149],"Code-Test":[150],"Evolution":[151],"(ACE)":[152],"framework":[153],"performs":[155],"iterative":[156],"test":[157,224],"error-driven":[160],"refinement,":[161],"enabling":[162],"effective":[163],"test-time":[164],"scaling":[165],"synthesis.":[168],"Extensive":[169],"experiments":[170],"across":[171],"multiple":[172],"state-of-the-art":[173],"LLMs":[174],"frameworks":[177],"reveal":[178],"even":[180],"strongest":[182],"LLM":[183],"agents":[184],"achieve":[185],"limited":[187],"pass":[188],"rates":[189],"(30\\%":[190],"-":[191],"55\\%),":[192],"exposing":[193],"substantial":[195],"gap":[196],"between":[197],"current":[198],"capabilities":[199],"real-world":[201],"development":[203],"requirements.":[204],"results":[206],"establish":[207],"RepoZero":[208],"challenging,":[211],"scalable,":[212],"reliable":[214],"testbed":[215],"end-to-end":[217],"highlight":[221],"self-verification":[222],"critical":[228],"direction":[229],"advancing":[231],"coding":[233],"agents.":[234]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
