{"id":"https://openalex.org/W7148353399","doi":"https://doi.org/10.48550/arxiv.2604.00239","title":"A Taxonomy of Programming Languages for Code Generation","display_name":"A Taxonomy of Programming Languages for Code Generation","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7148353399","doi":"https://doi.org/10.48550/arxiv.2604.00239"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00239","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132819656","display_name":"Nishat Raihan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Raihan, Nishat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025493324","display_name":"Christian D. Newman","orcid":"https://orcid.org/0000-0002-8838-4074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Newman, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132805445","display_name":"Marcos Zampieri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zampieri, Marcos","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5132819656"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.272599995136261,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.272599995136261,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1923999935388565,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12280000001192093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.6700999736785889},{"id":"https://openalex.org/keywords/second-generation-programming-language","display_name":"Second-generation programming language","score":0.5989999771118164},{"id":"https://openalex.org/keywords/comparison-of-multi-paradigm-programming-languages","display_name":"Comparison of multi-paradigm programming languages","score":0.5234000086784363},{"id":"https://openalex.org/keywords/third-generation-programming-language","display_name":"Third-generation programming language","score":0.424699991941452},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.4214000105857849},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.3537999987602234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7753000259399414},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.6700999736785889},{"id":"https://openalex.org/C11164408","wikidata":"https://www.wikidata.org/wiki/Q18657800","display_name":"Second-generation programming language","level":4,"score":0.5989999771118164},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5246999859809875},{"id":"https://openalex.org/C74149592","wikidata":"https://www.wikidata.org/wiki/Q762446","display_name":"Comparison of multi-paradigm programming languages","level":5,"score":0.5234000086784363},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4327999949455261},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4291999936103821},{"id":"https://openalex.org/C206146517","wikidata":"https://www.wikidata.org/wiki/Q1294375","display_name":"Third-generation programming language","level":5,"score":0.424699991941452},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.4214000105857849},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.3537999987602234},{"id":"https://openalex.org/C199305712","wikidata":"https://www.wikidata.org/wiki/Q3205914","display_name":"Fifth-generation programming language","level":3,"score":0.3005000054836273},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C145628200","wikidata":"https://www.wikidata.org/wiki/Q238137","display_name":"Fourth-generation programming language","level":5,"score":0.2542000114917755},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"world's":[1],"7,000+":[2],"languages":[3,34,75,85,102],"vary":[4],"widely":[5],"in":[6,95],"the":[7,67],"availability":[8],"of":[9,22,53,84,92,101,111,139],"resources":[10],"for":[11,43,90,133],"NLP,":[12],"motivating":[13],"efforts":[14],"to":[15],"systematically":[16],"categorize":[17],"them":[18],"by":[19],"their":[20],"degree":[21],"resourcefulness":[23],"(Joshi":[24],"et":[25],"al.,":[26],"2020).":[27],"A":[28],"similar":[29],"disparity":[30],"exists":[31],"among":[32],"programming":[33],"(PLs);":[35],"however,":[36],"no":[37],"resource-tier":[38],"taxonomy":[39,58],"has":[40],"been":[41],"established":[42],"code.":[44],"As":[45],"large":[46],"language":[47],"models":[48],"(LLMs)":[49],"grow":[50],"increasingly":[51],"capable":[52],"generating":[54],"code,":[55],"such":[56],"a":[57,130],"becomes":[59],"essential.":[60],"To":[61],"fill":[62],"this":[63,120],"gap,":[64],"we":[65],"present":[66],"first":[68],"reproducible":[69],"PL":[70],"resource":[71],"classification,":[72],"grouping":[73],"646":[74],"into":[76],"four":[77],"tiers.":[78],"We":[79],"show":[80],"that":[81,119],"only":[82],"1.9%":[83],"(Tier":[86,103],"3,":[87],"High)":[88],"account":[89],"74.6%":[91],"all":[93],"tokens":[94],"seven":[96],"major":[97],"corpora,":[98],"while":[99],"71.7%":[100],"0,":[104],"Scarce)":[105],"contribute":[106],"just":[107],"1.0%.":[108],"Statistical":[109],"analyses":[110],"within-tier":[112],"inequality,":[113],"dispersion,":[114],"and":[115,125,136],"distributional":[116],"skew":[117],"confirm":[118],"imbalance":[121],"is":[122],"both":[123],"extreme":[124],"systematic.":[126],"Our":[127],"results":[128],"provide":[129],"principled":[131],"framework":[132],"dataset":[134],"curation":[135],"tier-aware":[137],"evaluation":[138],"multilingual":[140],"LLMs.":[141]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
