{"id":"https://openalex.org/W4281763794","doi":"https://doi.org/10.1145/3520312.3534862","title":"A systematic evaluation of large language models of code","display_name":"A systematic evaluation of large language models of code","publication_year":2022,"publication_date":"2022-06-10","ids":{"openalex":"https://openalex.org/W4281763794","doi":"https://doi.org/10.1145/3520312.3534862"},"language":"en","primary_location":{"id":"doi:10.1145/3520312.3534862","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3520312.3534862","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3520312.3534862","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM SIGPLAN International Symposium on Machine Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3520312.3534862","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038743835","display_name":"Frank F. Xu","orcid":"https://orcid.org/0000-0002-9662-7582"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Frank F. Xu","raw_affiliation_strings":["Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107243336","display_name":"Uri Alon","orcid":"https://orcid.org/0000-0001-5796-9988"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Uri Alon","raw_affiliation_strings":["Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068811427","display_name":"Graham Neubig","orcid":"https://orcid.org/0000-0002-2072-3789"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Graham Neubig","raw_affiliation_strings":["Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009679905","display_name":"Vincent J. Hellendoorn","orcid":"https://orcid.org/0000-0001-7516-0525"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vincent Josua Hellendoorn","raw_affiliation_strings":["Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5038743835"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":147.9336,"has_fulltext":true,"cited_by_count":486,"citation_normalized_percentile":{"value":0.99992385,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8490298986434937},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.7076215744018555},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.6789590120315552},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6556635499000549},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5028156638145447},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.47580844163894653},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4667425751686096},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46373167634010315},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4306923449039459},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.1828484833240509},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.07873255014419556}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8490298986434937},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.7076215744018555},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.6789590120315552},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6556635499000549},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5028156638145447},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.47580844163894653},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4667425751686096},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46373167634010315},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4306923449039459},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1828484833240509},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07873255014419556}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3520312.3534862","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3520312.3534862","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3520312.3534862","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM SIGPLAN International Symposium on Machine Programming","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3520312.3534862","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3520312.3534862","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3520312.3534862","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM SIGPLAN International Symposium on Machine Programming","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5799999833106995,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1197061222","display_name":"SHF: Small: Open-domain, Data-driven Code Synthesis from Natural Language","funder_award_id":"1815287","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5921281487","display_name":null,"funder_award_id":"number","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4281763794.pdf","grobid_xml":"https://content.openalex.org/works/W4281763794.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1655078475","https://openalex.org/W2143861926","https://openalex.org/W2344444819","https://openalex.org/W2740130862","https://openalex.org/W2914120296","https://openalex.org/W2962784628","https://openalex.org/W2963809228","https://openalex.org/W2978835257","https://openalex.org/W2979792666","https://openalex.org/W2998704965","https://openalex.org/W3011564318","https://openalex.org/W3170092793","https://openalex.org/W3170572542","https://openalex.org/W3198659451","https://openalex.org/W3198685994","https://openalex.org/W3212496002","https://openalex.org/W4200203799","https://openalex.org/W4286696249","https://openalex.org/W6680532216","https://openalex.org/W6767737316"],"related_works":["https://openalex.org/W2124842464","https://openalex.org/W2382657549","https://openalex.org/W4376877853","https://openalex.org/W2113128227","https://openalex.org/W632256878","https://openalex.org/W2491403535","https://openalex.org/W3081644756","https://openalex.org/W2479811461","https://openalex.org/W2104915799","https://openalex.org/W4311938462"],"abstract_inverted_index":{"Large":[0],"language":[1,19,96],"models":[2,82,158,163],"(LMs)":[3],"of":[4,49,56,108,119,140],"code":[5,13,16,25,141],"have":[6,181],"recently":[7],"shown":[8],"tremendous":[9],"promise":[10],"in":[11,47,87,105,177],"completing":[12],"and":[14,39,65,166,175],"synthesizing":[15],"from":[17],"natural":[18,95],"descriptions.":[20],"However,":[21],"the":[22,57,106,132,151],"current":[23],"state-of-the-art":[24],"LMs":[26],"(e.g.,":[27],"Codex)":[28],"are":[29,164],"not":[30,75],"publicly":[31,167],"available,":[32],"leaving":[33],"many":[34],"questions":[35],"about":[36],"their":[37],"model":[38,112],"data":[40],"design":[41],"decisions.":[42],"We":[43,98,121,180],"aim":[44],"to":[45],"fill":[46],"some":[48,88],"these":[50],"blanks":[51],"through":[52],"a":[53,109,116,123,147],"systematic":[54],"evaluation":[55],"largest":[58],"existing":[59,80],"models:":[60],"Codex,":[61],"GPT-J,":[62],"GPT-Neo,":[63],"GPT-NeoX-20B,":[64],"CodeParrot,":[66],"across":[67,142],"various":[68],"programming":[69,89,144,153],"languages.":[70],"Although":[71],"Codex":[72],"itself":[73],"is":[74],"open-source,":[76],"we":[77],"find":[78],"that":[79,135],"opensource":[81],"do":[83],"achieve":[84],"close":[85],"results":[86],"languages,":[90],"although":[91],"targeted":[92],"mainly":[93],"for":[94],"modeling.":[97],"further":[99],"identify":[100],"an":[101,182],"important":[102],"missing":[103],"piece":[104],"form":[107],"large":[110],"open-source":[111,165],"trained":[113,137,162],"exclusively":[114],"on":[115,131,138,146],"multi-lingual":[117],"corpus":[118],"code.":[120],"release":[122],"new":[124],"model,":[125],"PolyCoder,":[126],"with":[127],"2.7B":[128],"parameters":[129],"based":[130],"GPT-2":[133],"architecture,":[134],"was":[136],"249GB":[139],"12":[143],"languages":[145],"single":[148],"machine.":[149],"In":[150],"C":[152],"language,":[154],"PolyCoder":[155],"outperforms":[156],"all":[157],"including":[159],"Codex.":[160],"Our":[161],"available":[168],"at":[169,185],"https://github.com/VHellendoorn/Code-LMs,":[170],"which":[171],"enables":[172],"future":[173],"research":[174],"application":[176],"this":[178],"area.":[179],"online":[183],"appendix":[184],"https://arxiv.org/abs/2202.13169.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":22},{"year":2025,"cited_by_count":159},{"year":2024,"cited_by_count":199},{"year":2023,"cited_by_count":98},{"year":2022,"cited_by_count":8}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
