{"id":"https://openalex.org/W4411267085","doi":"https://doi.org/10.1145/3729315","title":"Scalable, Validated Code Translation of Entire Projects using Large Language Models","display_name":"Scalable, Validated Code Translation of Entire Projects using Large Language Models","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4411267085","doi":"https://doi.org/10.1145/3729315"},"language":"en","primary_location":{"id":"doi:10.1145/3729315","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3729315","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3729315","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103036161","display_name":"Hanliang Zhang","orcid":"https://orcid.org/0000-0003-3309-0439"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Hanliang Zhang","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072292925","display_name":"Cristina David","orcid":"https://orcid.org/0000-0002-9106-934X"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Cristina David","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074307489","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0001-7780-630X"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042421417","display_name":"Brandon Paulsen","orcid":"https://orcid.org/0000-0001-7790-6570"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brandon Paulsen","raw_affiliation_strings":["Amazon, Arlington, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Arlington, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086206346","display_name":"Daniel Kroening","orcid":"https://orcid.org/0000-0002-6681-5283"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Kroening","raw_affiliation_strings":["Amazon, Seattle, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103036161"],"corresponding_institution_ids":["https://openalex.org/I36234482"],"apc_list":null,"apc_paid":null,"fwci":21.887,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.99210753,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"9","issue":"PLDI","first_page":"1616","last_page":"1641"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6911446452140808},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6823240518569946},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.6693075895309448},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6453386545181274},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5628228187561035},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43741557002067566},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15400537848472595}],"concepts":[{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6911446452140808},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6823240518569946},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.6693075895309448},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6453386545181274},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5628228187561035},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43741557002067566},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15400537848472595},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3729315","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3729315","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},{"id":"pmh:oai:research-information.bris.ac.uk:publications/cdf4c654-e3f7-4f20-b27e-e21d8d68f6aa","is_oa":true,"landing_page_url":"https://hdl.handle.net/1983/cdf4c654-e3f7-4f20-b27e-e21d8d68f6aa","pdf_url":"https://research-information.bris.ac.uk/files/460590511/3729315.pdf","source":{"id":"https://openalex.org/S7407055359","display_name":"Explore Bristol Research","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhang, H, David, C, Wang, M, Paulsen, B & Kroening, D 2025, Scalable, Validated Code Translation of Entire Projects using Large Language Models. in M Hicks (ed.), Proceedings of the ACM on Programming Languages : Issue PLDI. vol. 9, 212, Proceedings of the ACM on Programming Languages, Association for Computing Machinery, New York, pp. 1616-1641. https://doi.org/10.1145/3729315","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:research-information.bris.ac.uk:openaire/cdf4c654-e3f7-4f20-b27e-e21d8d68f6aa","is_oa":true,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/cdf4c654-e3f7-4f20-b27e-e21d8d68f6aa","pdf_url":null,"source":{"id":"https://openalex.org/S4306400895","display_name":"Bristol Research (University of Bristol)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I36234482","host_organization_name":"University of Bristol","host_organization_lineage":["https://openalex.org/I36234482"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhang, H, David, C, Wang, M, Paulsen, B & Kroening, D 2025, Scalable, Validated Code Translation of Entire Projects using Large Language Models. in M Hicks (ed.), Proceedings of the ACM on Programming Languages : Issue PLDI. vol. 9, 212, Proceedings of the ACM on Programming Languages, Association for Computing Machinery, New York, pp. 1616-1641. https://doi.org/10.1145/3729315","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"doi:10.1145/3729315","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3729315","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1994649009","https://openalex.org/W2040856861","https://openalex.org/W2122103021","https://openalex.org/W2125152082","https://openalex.org/W2140697469","https://openalex.org/W2152565783","https://openalex.org/W2279657578","https://openalex.org/W2701082322","https://openalex.org/W2888824816","https://openalex.org/W2911450990","https://openalex.org/W2963804422","https://openalex.org/W3000442391","https://openalex.org/W3091157996","https://openalex.org/W3205049031","https://openalex.org/W4245120671","https://openalex.org/W4281974386","https://openalex.org/W4384345708","https://openalex.org/W4384471416","https://openalex.org/W4385571480","https://openalex.org/W4388502443","https://openalex.org/W4389159862","https://openalex.org/W4389438938","https://openalex.org/W4389518960","https://openalex.org/W4389520032","https://openalex.org/W4394769102","https://openalex.org/W4399214196","https://openalex.org/W4400600893","https://openalex.org/W4408124887","https://openalex.org/W4411551908","https://openalex.org/W6893389937"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W2965083567","https://openalex.org/W4235240664","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W1983399550","https://openalex.org/W97075385","https://openalex.org/W2100349471"],"abstract_inverted_index":{"Large":[0],"language":[1,97,152],"models":[2],"(LLMs)":[3],"show":[4],"promise":[5],"in":[6,35,116,149],"code":[7,25,40,60,63,210],"translation":[8,26,36,140,144],"due":[9],"to":[10,13,54,105,121,145,172,187,192,206],"their":[11],"ability":[12],"generate":[14,199],"idiomatic":[15],"code.":[16],"However,":[17],"a":[18,33,51,102],"significant":[19],"limitation":[20,48],"when":[21,91,119],"using":[22],"LLMs":[23,88],"for":[24,39,203,223],"is":[27,82],"scalability:":[28],"existing":[29,230],"works":[30],"have":[31,101],"shown":[32],"drop":[34],"success":[37],"rates":[38],"exceeding":[41],"around":[42],"100":[43],"lines.":[44],"We":[45,183],"overcome":[46],"this":[47,80],"by":[49,75],"developing":[50],"modular":[52],"approach":[53,81,186],"translation,":[55],"where":[56],"we":[57,85,128,196],"partition":[58],"the":[59,95,106,111,147,168,178],"into":[61],"small":[62],"fragments":[64],"which":[65,137,163],"can":[66,197],"be":[67],"translated":[68],"independently":[69],"and":[70,109,154,159,211],"semantically":[71,156],"validated":[72,222],"(that":[73],"is,":[74],"checking":[76],"I/O":[77,224],"equivalence).":[78],"When":[79],"applied":[83],"naively,":[84],"discover":[86],"that":[87,98,110,195],"are":[89],"unreliable":[90],"translating":[92,188],"features":[93],"of":[94,180,209,217,219],"source":[96],"do":[99],"not":[100],"direct":[103],"mapping":[104,135],"target":[107],"language,":[108],"LLM":[112,148],"often":[113],"gets":[114],"stuck":[115],"repair":[117],"loops":[118],"attempting":[120],"fix":[122],"errors.":[123],"To":[124],"address":[125],"these":[126],"issues,":[127],"introduce":[129],"two":[130],"key":[131],"concepts:":[132],"(1)":[133],"feature":[134],",":[136,162],"integrates":[138],"predefined":[139],"rules":[141],"with":[142,214],"LLM-based":[143],"guide":[146],"navigating":[150],"subtle":[151],"differences":[153],"producing":[155],"accurate":[157],"code;":[158],"(2)":[160],"type-compatibility":[161],"facilitates":[164],"localized":[165],"checks":[166],"at":[167],"function":[169],"signature":[170],"level":[171],"detect":[173],"errors":[174],"early,":[175],"thereby":[176],"narrowing":[177],"scope":[179],"potential":[181],"repairs.":[182],"apply":[184],"our":[185],"real-world":[189],"Go":[190],"codebases":[191],"Rust,":[193],"demonstrating":[194],"consistently":[198],"reliable":[200],"Rust":[201],"translations":[202],"projects":[204],"up":[205],"9,700":[207],"lines":[208],"780":[212],"functions,":[213],"an":[215],"average":[216],"73%":[218],"functions":[220],"successfully":[221],"equivalence,":[225],"considerably":[226],"higher":[227],"than":[228],"any":[229],"work.":[231]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
