{"id":"https://openalex.org/W7155782915","doi":"https://doi.org/10.48550/arxiv.2604.22659","title":"RealBench: A Repo-Level Code Generation Benchmark Aligned with Real-World Software Development Practices","display_name":"RealBench: A Repo-Level Code Generation Benchmark Aligned with Real-World Software Development Practices","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7155782915","doi":"https://doi.org/10.48550/arxiv.2604.22659"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22659","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22659","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22659","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134681549","display_name":"JIA LIN LI","orcid":"https://orcid.org/0000-0003-1274-3867"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111691137","display_name":"H. Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Hongyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134715842","display_name":"Yiran Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yiran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134682081","display_name":"Kechi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Kechi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134720971","display_name":"Tianqi Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Tianqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134012719","display_name":"Tiankuo Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Tiankuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134689798","display_name":"Weinan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Weinan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134712852","display_name":"Zhi Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Zhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134685891","display_name":"Ge Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101234859","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0002-8331-7649"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113138275","display_name":"Yingtao Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Yingtao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134664909","display_name":"Yihong Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Yihong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.6909999847412109,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.6909999847412109,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.09380000084638596,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.054999999701976776,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.6592000126838684},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.5788000226020813},{"id":"https://openalex.org/keywords/kpi-driven-code-analysis","display_name":"KPI-driven code analysis","score":0.5131999850273132},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.5008999705314636},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.44749999046325684},{"id":"https://openalex.org/keywords/unified-modeling-language","display_name":"Unified Modeling Language","score":0.4239000082015991},{"id":"https://openalex.org/keywords/code-review","display_name":"Code review","score":0.42329999804496765},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.37130001187324524},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3352999985218048}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6980000138282776},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.6592000126838684},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.6218000054359436},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.5788000226020813},{"id":"https://openalex.org/C121957198","wikidata":"https://www.wikidata.org/wiki/Q14365593","display_name":"KPI-driven code analysis","level":5,"score":0.5131999850273132},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.5008999705314636},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.45179998874664307},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C145644426","wikidata":"https://www.wikidata.org/wiki/Q169411","display_name":"Unified Modeling Language","level":3,"score":0.4239000082015991},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.42329999804496765},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.37130001187324524},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3352999985218048},{"id":"https://openalex.org/C180152950","wikidata":"https://www.wikidata.org/wiki/Q2904257","display_name":"Software development process","level":4,"score":0.32679998874664307},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.320499986410141},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.31869998574256897},{"id":"https://openalex.org/C509989072","wikidata":"https://www.wikidata.org/wiki/Q15188241","display_name":"Model-driven architecture","level":4,"score":0.29789999127388},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C186846655","wikidata":"https://www.wikidata.org/wiki/Q3398377","display_name":"Software construction","level":4,"score":0.2854999899864197},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.2849000096321106},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C4478048","wikidata":"https://www.wikidata.org/wiki/Q950250","display_name":"Test-driven development","level":4,"score":0.26840001344680786},{"id":"https://openalex.org/C146222976","wikidata":"https://www.wikidata.org/wiki/Q1204997","display_name":"Business logic","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22659","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22659","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22659","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22659","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4338991641998291}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Writing":[0],"code":[1,25,44,59,93,110,152,173,187],"requires":[2],"significant":[3,197],"time":[4],"and":[5,31,53,77,127,194,208,227],"effort":[6],"in":[7,50,167,212],"software":[8,98,117,178],"development.":[9],"To":[10,101],"automate":[11,97],"this":[12,103],"process,":[13],"researchers":[14],"have":[15,33],"made":[16],"substantial":[17],"progress":[18],"using":[19],"Large":[20],"Language":[21],"Models":[22],"(LLMs)":[23],"for":[24,171],"generation.":[26],"Many":[27],"benchmarks":[28,76],"like":[29],"HumanEval":[30],"EvoCodeBench":[32],"been":[34],"created":[35],"to":[36,42,225,257],"evaluate":[37],"LLMs":[38,189,203],"by":[39],"requiring":[40],"them":[41],"generate":[43],"from":[45],"natural":[46,69,124],"language":[47,70,125],"requirements.":[48],"However,":[49],"enterprise":[51],"applications":[52],"team":[54],"development,":[55],"developers":[56,135],"typically":[57,136],"write":[58],"based":[60],"on":[61,140,242],"structured":[62,158],"designs":[63],"or":[64],"specifications":[65],"rather":[66],"than":[67],"raw":[68],"descriptions.":[71],"This":[72],"gap":[73],"between":[74],"existing":[75],"real":[78],"industry":[79,116],"development":[80,99,118,179],"practices":[81],"means":[82],"that":[83,184],"current":[84,168],"benchmark":[85,112],"scores":[86],"may":[87],"not":[88],"accurately":[89],"reflect":[90],"how":[91,134],"much":[92,191],"generation":[94,111,153,174,240],"can":[95],"help":[96],"tasks.":[100],"address":[102],"gap,":[104],"we":[105,144,182],"propose":[106],"RealBench,":[107],"a":[108,146,247],"repository-level":[109],"aligned":[113,175],"with":[114,157,176,250],"real-world":[115,177],"practices.":[119,180],"Each":[120],"example":[121],"includes":[122],"both":[123],"requirements":[126],"UML":[128,213],"diagrams":[129],"as":[130],"system":[131,159],"design,":[132],"matching":[133],"receive":[137],"specifications.":[138],"Based":[139],"the":[141,216,232,238,251],"constructed":[142],"benchmarks,":[143],"conduct":[145],"systematic":[147],"evaluation":[148],"of":[149,218],"advanced":[150],"LLMs'":[151,169],"capabilities":[154,170],"when":[155],"provided":[156],"designs.":[160],"The":[161],"experimental":[162],"results":[163],"reveal":[164],"key":[165],"insights":[166],"repo-level":[172,186],"First,":[181],"notice":[183],"regarding":[185],"generation,":[188],"show":[190],"worse":[192],"performance":[193,198],"there":[195],"are":[196,204],"gaps":[199],"among":[200],"LLMs.":[201],"Second,":[202],"good":[205],"at":[206,235],"finding":[207],"creating":[209],"modules":[210,220],"defined":[211],"diagrams,":[214],"but":[215],"quality":[217],"generated":[219],"is":[221,237],"often":[222],"poor":[223],"due":[224],"grammar":[226],"logic":[228],"errors.":[229],"Third,":[230],"generating":[231,246],"entire":[233],"repository":[234,249],"once":[236],"best":[239],"strategy":[241,253],"smaller":[243],"repositories,":[244],"while":[245],"complex":[248],"module-by-module":[252],"works":[254],"better":[255],"compared":[256],"other":[258],"strategies.":[259]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-28T00:00:00"}
