{"id":"https://openalex.org/W2014877837","doi":"https://doi.org/10.1109/dapse.2013.6603797","title":"Building Statistical Language Models of code","display_name":"Building Statistical Language Models of code","publication_year":2013,"publication_date":"2013-05-01","ids":{"openalex":"https://openalex.org/W2014877837","doi":"https://doi.org/10.1109/dapse.2013.6603797","mag":"2014877837"},"language":"en","primary_location":{"id":"doi:10.1109/dapse.2013.6603797","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dapse.2013.6603797","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 1st International Workshop on Data Analysis Patterns in Software Engineering (DAPSE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008492426","display_name":"Peter Schulam","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Schulam","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, USA","Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040906638","display_name":"Roni Rosenfeld","orcid":"https://orcid.org/0000-0002-3274-5862"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Roni Rosenfeld","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, USA","Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036744986","display_name":"Pr\u00e9mkumar D\u00e9vanbu","orcid":"https://orcid.org/0000-0002-4346-5276"},"institutions":[{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Premkumar Devanbu","raw_affiliation_strings":["Department of Computer Science, University of California, Davis, USA","Dept. of Computer Science, University of California at Davis, Davis, CA, USA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of California, Davis, USA","institution_ids":["https://openalex.org/I84218800"]},{"raw_affiliation_string":"Dept. of Computer Science, University of California at Davis, Davis, CA, USA#TAB#","institution_ids":["https://openalex.org/I84218800"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8362,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82491571,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"2","issue":null,"first_page":"1","last_page":"3"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8525916337966919},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.708175539970398},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6820861101150513},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6319743394851685},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.608583390712738},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.585201621055603},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5613152980804443},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.49690988659858704},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.46106845140457153},{"id":"https://openalex.org/keywords/universal-networking-language","display_name":"Universal Networking Language","score":0.43193212151527405},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4308808445930481},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.42547211050987244},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4193767011165619},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3697947859764099},{"id":"https://openalex.org/keywords/comprehension-approach","display_name":"Comprehension approach","score":0.093706876039505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8525916337966919},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.708175539970398},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6820861101150513},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6319743394851685},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.608583390712738},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.585201621055603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5613152980804443},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.49690988659858704},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.46106845140457153},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.43193212151527405},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4308808445930481},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.42547211050987244},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4193767011165619},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3697947859764099},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.093706876039505}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dapse.2013.6603797","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dapse.2013.6603797","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 1st International Workshop on Data Analysis Patterns in Software Engineering (DAPSE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1631260214","https://openalex.org/W2082092506","https://openalex.org/W2097927681","https://openalex.org/W2122947685","https://openalex.org/W2134237567","https://openalex.org/W2142403498","https://openalex.org/W2950186769","https://openalex.org/W3146720657","https://openalex.org/W4233559841","https://openalex.org/W6636811518"],"related_works":["https://openalex.org/W2394860946","https://openalex.org/W3213549959","https://openalex.org/W1508853749","https://openalex.org/W3129739276","https://openalex.org/W2183673572","https://openalex.org/W3075498906","https://openalex.org/W143910215","https://openalex.org/W3203715570","https://openalex.org/W2913520953","https://openalex.org/W1857365372"],"abstract_inverted_index":{"We":[0,72],"present":[1],"the":[2,61,77,90],"Source":[3],"Code":[4],"Statistical":[5,11],"Language":[6],"Model":[7],"data":[8,56],"analysis":[9,57],"pattern.":[10],"language":[12,25,42,50,66,98],"models":[13,43,67,99],"have":[14,86],"been":[15,87],"an":[16],"enabling":[17],"tool":[18,103],"for":[19,94],"a":[20,36,102],"wide":[21],"array":[22],"of":[23,63],"important":[24],"technologies.":[26],"Speech":[27],"recognition,":[28],"machine":[29],"translation,":[30],"and":[31],"document":[32],"summarization":[33],"(to":[34],"name":[35],"few)":[37],"all":[38],"rely":[39],"on":[40],"statistical":[41,97],"to":[44,48,82,109,111],"assign":[45],"probability":[46],"estimates":[47],"natural":[49,95],"utterances":[51],"or":[52],"sentences.":[53],"In":[54],"this":[55],"pattern,":[58],"we":[59],"describe":[60],"process":[62],"building":[64],"n-gram":[65],"over":[68,89],"software":[69,79],"source":[70],"files.":[71],"hope":[73],"that":[74,85,104],"by":[75],"introducing":[76],"empirical":[78],"engineering":[80],"community":[81],"best":[83],"practices":[84],"established":[88],"years":[91],"in":[92],"research":[93,114],"languages,":[96],"can":[100],"become":[101],"SE":[105],"researchers":[106],"are":[107],"able":[108],"use":[110],"explore":[112],"new":[113],"directions.":[115]},"counts_by_year":[{"year":2015,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
