{"id":"https://openalex.org/W4403223094","doi":"https://doi.org/10.1145/3689735","title":"Knowledge Transfer from High-Resource to Low-Resource Programming Languages for Code LLMs","display_name":"Knowledge Transfer from High-Resource to Low-Resource Programming Languages for Code LLMs","publication_year":2024,"publication_date":"2024-10-08","ids":{"openalex":"https://openalex.org/W4403223094","doi":"https://doi.org/10.1145/3689735"},"language":"en","primary_location":{"id":"doi:10.1145/3689735","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689735","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3689735","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061118771","display_name":"Federico Cassano","orcid":"https://orcid.org/0000-0002-9318-7454"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Federico Cassano","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0002-9318-7454","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002038171","display_name":"John Gouwar","orcid":"https://orcid.org/0000-0003-0494-7245"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Gouwar","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0003-0494-7245","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008397042","display_name":"Francesca Lucchetti","orcid":"https://orcid.org/0009-0002-5837-6097"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Francesca Lucchetti","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0009-0002-5837-6097","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102783760","display_name":"Claire Schlesinger","orcid":"https://orcid.org/0009-0000-2533-1242"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Claire Schlesinger","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0009-0000-2533-1242","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108591760","display_name":"Anders Freeman","orcid":null},"institutions":[{"id":"https://openalex.org/I189731429","display_name":"Wellesley College","ror":"https://ror.org/01srpnj69","country_code":"US","type":"education","lineage":["https://openalex.org/I189731429"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anders Freeman","raw_affiliation_strings":["Wellesley College, Wellesley, USA"],"raw_orcid":"https://orcid.org/0009-0005-1904-6193","affiliations":[{"raw_affiliation_string":"Wellesley College, Wellesley, USA","institution_ids":["https://openalex.org/I189731429"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075492610","display_name":"Carolyn Jane Anderson","orcid":"https://orcid.org/0000-0001-5717-4210"},"institutions":[{"id":"https://openalex.org/I189731429","display_name":"Wellesley College","ror":"https://ror.org/01srpnj69","country_code":"US","type":"education","lineage":["https://openalex.org/I189731429"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carolyn Jane Anderson","raw_affiliation_strings":["Wellesley College, Wellesley, USA"],"raw_orcid":"https://orcid.org/0000-0001-5717-4210","affiliations":[{"raw_affiliation_string":"Wellesley College, Wellesley, USA","institution_ids":["https://openalex.org/I189731429"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010858670","display_name":"Molly Q Feldman","orcid":"https://orcid.org/0000-0002-5222-7720"},"institutions":[{"id":"https://openalex.org/I70571728","display_name":"Oberlin College","ror":"https://ror.org/05ac26z88","country_code":"US","type":"education","lineage":["https://openalex.org/I70571728"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Molly Q Feldman","raw_affiliation_strings":["Oberlin College, Oberlin, USA"],"raw_orcid":"https://orcid.org/0000-0002-5222-7720","affiliations":[{"raw_affiliation_string":"Oberlin College, Oberlin, USA","institution_ids":["https://openalex.org/I70571728"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075185353","display_name":"Michael Greenberg","orcid":"https://orcid.org/0000-0003-0014-7670"},"institutions":[{"id":"https://openalex.org/I108468826","display_name":"Stevens Institute of Technology","ror":"https://ror.org/02z43xh36","country_code":"US","type":"education","lineage":["https://openalex.org/I108468826"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Greenberg","raw_affiliation_strings":["Stevens Institute of Technology, Hoboken, USA"],"raw_orcid":"https://orcid.org/0000-0003-0014-7670","affiliations":[{"raw_affiliation_string":"Stevens Institute of Technology, Hoboken, USA","institution_ids":["https://openalex.org/I108468826"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034240415","display_name":"Abhinav Jangda","orcid":"https://orcid.org/0000-0002-4849-6776"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abhinav Jangda","raw_affiliation_strings":["Microsoft Research, Redmond, USA"],"raw_orcid":"https://orcid.org/0000-0002-4849-6776","affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044042294","display_name":"Arjun Guha","orcid":"https://orcid.org/0000-0002-7493-3271"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjun Guha","raw_affiliation_strings":["Northeastern University, Northeastern, USA","Roblox, San Mateo, USA","Northeastern University, Northeastern, USA / Roblox, San Mateo, USA"],"raw_orcid":"https://orcid.org/0000-0002-7493-3271","affiliations":[{"raw_affiliation_string":"Northeastern University, Northeastern, USA","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Roblox, San Mateo, USA","institution_ids":[]},{"raw_affiliation_string":"Northeastern University, Northeastern, USA / Roblox, San Mateo, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5061118771"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":12.8075,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.99477287,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"8","issue":"OOPSLA2","first_page":"677","last_page":"708"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.632860004901886},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5726326704025269},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49653154611587524},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.35239070653915405}],"concepts":[{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.632860004901886},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5726326704025269},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49653154611587524},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.35239070653915405},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3689735","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689735","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3689735","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689735","pdf_url":null,"source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3899547837","display_name":null,"funder_award_id":"SES-2326173, SES-2326174, SES-2326175","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W2036676782","https://openalex.org/W2108325777","https://openalex.org/W2440056063","https://openalex.org/W3045595218","https://openalex.org/W3102273185","https://openalex.org/W3104449953","https://openalex.org/W3129831491","https://openalex.org/W4281669078","https://openalex.org/W4284688961","https://openalex.org/W4312282218","https://openalex.org/W4321013654","https://openalex.org/W4366204357","https://openalex.org/W4376167329","https://openalex.org/W4382239980","https://openalex.org/W4384304865","https://openalex.org/W4385562549","https://openalex.org/W4388556611","https://openalex.org/W4389104713","https://openalex.org/W4389988840","https://openalex.org/W4391272793","https://openalex.org/W4394745423","https://openalex.org/W4394769102","https://openalex.org/W4394769544","https://openalex.org/W6852746770"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032","https://openalex.org/W2382290278"],"abstract_inverted_index":{"Over":[0],"the":[1,39,100,150,189,192,211,229,236,240,259,299,369],"past":[2],"few":[3],"years,":[4],"Large":[5],"Language":[6],"Models":[7],"of":[8,41,102,206,216,279,281,348,364],"Code":[9,22,46,53,103,132,157,185,308,351],"(Code":[10],"LLMs)":[11],"have":[12,15,81,266],"started":[13],"to":[14,128,159,187,196,227,239,246,276,320,372,389,396,411,413],"a":[16,45,156,167,184,197,204,224,255],"significant":[17],"impact":[18],"on":[19,58,105,368],"programming":[20,33,51,60],"practice.":[21],"LLMs":[23,54,104],"are":[24,63,219],"also":[25,376],"emerging":[26],"as":[27,298,426],"building":[28],"blocks":[29],"for":[30,98,120,146,163,286,353,380,401],"research":[31],"in":[32,66,149,210,233,258],"languages":[34,61,79,107,142,148],"and":[35,89,175,294,328,350,358,387,416,421],"software":[36],"engineering.":[37],"However,":[38],"quality":[40],"code":[42,165,176,190,373],"produced":[43],"by":[44,50],"LLM":[47,158,186,309],"varies":[48],"significantly":[49,418],"language.":[52,200,302],"produce":[55],"impressive":[56],"results":[57],"high-resource":[59,141,168,193,301],"that":[62,80,331,360,391],"well":[64],"represented":[65],"their":[67],"training":[68,83,138,144,208,256,284,313,427],"data":[69,84,139,145,209,314],"(e.g.,":[70,86],"Java,":[71],"Python,":[72],"or":[73],"JavaScript),":[74],"but":[75,214],"struggle":[76],"with":[77,177,269,311,340],"low-resource":[78,106,121,147,199,261,288,402],"limited":[82],"available":[85],"OCaml,":[87,292,356],"Racket,":[88,295],"several":[90],"others).":[91],"This":[92,201],"paper":[93],"presents":[94],"an":[95,306],"effective":[96,422],"approach":[97,275,408],"boosting":[99],"performance":[101],"using":[108,296],"semi-synthetic":[109],"data.":[110],"Our":[111],"approach,":[112],"called":[113],"M":[114,134,341,392,405],"ulti":[115,135,342,393,406],"PL-T,":[116,343],"generates":[117],"high-quality":[118],"datasets":[119,338],"languages,":[122,415],"which":[123,243,317],"can":[124],"then":[125],"be":[126,335],"used":[127],"fine-tune":[129],"any":[130],"pretrained":[131],"LLM.":[133],"PL-T":[136,394,407],"translates":[137],"from":[140,166,191,235],"into":[143],"following":[151],"way.":[152],"1)":[153],"We":[154,182,222,272,375],"use":[155,183,223,305],"synthesize":[160],"unit":[161],"tests":[162,174],"commented":[164],"source":[169,194,237,300],"language,":[170,213,242],"filtering":[171],"out":[172],"faulty":[173],"low":[178],"test":[179,230,270],"coverage.":[180],"2)":[181],"translate":[188],"language":[195,238,262,371],"target":[198,212,241,260],"gives":[202],"us":[203,245,319],"corpus":[205,257],"candidate":[207],"many":[215],"these":[217,365],"translations":[218],"wrong.":[220],"3)":[221],"lightweight":[225],"compiler":[226],"compile":[228],"cases":[231],"generated":[232,339],"(1)":[234],"allows":[244,318],"filter":[247],"our":[248],"obviously":[249],"wrong":[250],"translations.":[251],"The":[252,404],"result":[253],"is":[254,409,417],"where":[263],"all":[264],"items":[265,285],"been":[267],"validated":[268,283],"cases.":[271],"apply":[273,412],"this":[274],"generate":[277],"tens":[278],"thousands":[280],"new,":[282],"five":[287],"languages:":[289],"Julia,":[290,354],"Lua,":[291,355],"R,":[293,357],"Python":[297],"Furthermore,":[303],"we":[304,344],"open":[307,312],"(StarCoderBase)":[310],"(The":[315],"Stack),":[316],"decontaminate":[321],"benchmarks,":[322],"train":[323],"models":[324,367],"without":[325],"violating":[326],"licenses,":[327],"run":[329],"experiments":[330],"could":[332],"not":[333],"otherwise":[334],"done.":[336],"Using":[337],"present":[345,377],"fine-tuned":[346],"versions":[347],"StarCoderBase":[349],"Llama":[352],"Racket":[359,378],"outperform":[361,397],"other":[362,398],"fine-tunes":[363,379],"base":[366],"natural":[370],"task.":[374],"two":[381],"very":[382],"recent":[383],"models,":[384],"DeepSeek":[385],"Coder":[386],"StarCoder2,":[388],"show":[390],"continues":[395],"fine-tuning":[399],"approaches":[400],"languages.":[403],"easy":[410],"new":[414],"more":[419],"efficient":[420],"than":[423],"alternatives":[424],"such":[425],"longer.":[428]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":1}],"updated_date":"2026-02-07T06:11:34.122080","created_date":"2025-10-10T00:00:00"}
