{"id":"https://openalex.org/W2148190602","doi":"https://doi.org/10.1109/msr.2013.6624029","title":"Mining source code repositories at massive scale using language modeling","display_name":"Mining source code repositories at massive scale using language modeling","publication_year":2013,"publication_date":"2013-05-01","ids":{"openalex":"https://openalex.org/W2148190602","doi":"https://doi.org/10.1109/msr.2013.6624029","mag":"2148190602"},"language":"en","primary_location":{"id":"doi:10.1109/msr.2013.6624029","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msr.2013.6624029","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 10th Working Conference on Mining Software Repositories (MSR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/066b5203-acbe-4e09-8009-057677e3300f","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080221214","display_name":"Miltiadis Allamanis","orcid":"https://orcid.org/0000-0002-5819-9900"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Miltiadis Allamanis","raw_affiliation_strings":["School of Informatics, University of Edinburgh, Edinburgh, UK"],"affiliations":[{"raw_affiliation_string":"School of Informatics, University of Edinburgh, Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028501178","display_name":"Charles Sutton","orcid":"https://orcid.org/0000-0002-0041-3820"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Charles Sutton","raw_affiliation_strings":["School of Informatics, University of Edinburgh, Edinburgh, UK"],"affiliations":[{"raw_affiliation_string":"School of Informatics, University of Edinburgh, Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5080221214"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":49.2987,"has_fulltext":false,"cited_by_count":322,"citation_normalized_percentile":{"value":0.99816998,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"207","last_page":"216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.775733232498169},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.586025059223175},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5206331610679626},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5069158673286438},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.41704773902893066},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3609575033187866},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3259839117527008},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.07104223966598511}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.775733232498169},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.586025059223175},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5206331610679626},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5069158673286438},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.41704773902893066},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3609575033187866},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3259839117527008},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07104223966598511},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/msr.2013.6624029","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msr.2013.6624029","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 10th Working Conference on Mining Software Repositories (MSR)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.295.2931","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.295.2931","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://homepages.inf.ed.ac.uk/csutton/publications/msr2013.pdf","raw_type":"text"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/066b5203-acbe-4e09-8009-057677e3300f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/066b5203-acbe-4e09-8009-057677e3300f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Allamanis, M & Sutton, C 2013, Mining source code repositories at massive scale using language modeling. in Mining Software Repositories (MSR), 2013 10th IEEE Working Conference on. Institute of Electrical and Electronics Engineers, pp. 207-216. https://doi.org/10.1109/MSR.2013.6624029","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/066b5203-acbe-4e09-8009-057677e3300f","is_oa":false,"landing_page_url":"http://hdl.handle.net/20.500.11820/066b5203-acbe-4e09-8009-057677e3300f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.ed.ac.uk:publications/066b5203-acbe-4e09-8009-057677e3300f","is_oa":true,"landing_page_url":"https://hdl.handle.net/20.500.11820/066b5203-acbe-4e09-8009-057677e3300f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Allamanis, M & Sutton, C 2013, Mining source code repositories at massive scale using language modeling. in Mining Software Repositories (MSR), 2013 10th IEEE Working Conference on. Institute of Electrical and Electronics Engineers, pp. 207-216. https://doi.org/10.1109/MSR.2013.6624029","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/066b5203-acbe-4e09-8009-057677e3300f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/066b5203-acbe-4e09-8009-057677e3300f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Allamanis, M & Sutton, C 2013, Mining source code repositories at massive scale using language modeling. in Mining Software Repositories (MSR), 2013 10th IEEE Working Conference on. Institute of Electrical and Electronics Engineers, pp. 207-216. https://doi.org/10.1109/MSR.2013.6624029","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W182831726","https://openalex.org/W1560914538","https://openalex.org/W1579838312","https://openalex.org/W1975257591","https://openalex.org/W2059621895","https://openalex.org/W2097927681","https://openalex.org/W2109144580","https://openalex.org/W2122414758","https://openalex.org/W2131723927","https://openalex.org/W2142403498","https://openalex.org/W2143960295","https://openalex.org/W2144517582","https://openalex.org/W2148702066","https://openalex.org/W2153943889","https://openalex.org/W2164961799","https://openalex.org/W2167830984","https://openalex.org/W2950186769","https://openalex.org/W3140103367","https://openalex.org/W3146720657","https://openalex.org/W6681198100"],"related_works":["https://openalex.org/W2003932708","https://openalex.org/W1967424056","https://openalex.org/W2284877871","https://openalex.org/W2138102289","https://openalex.org/W868043274","https://openalex.org/W2513523087","https://openalex.org/W1187916458","https://openalex.org/W2373861055","https://openalex.org/W1559716973","https://openalex.org/W3081644756"],"abstract_inverted_index":{"The":[0,79],"tens":[1],"of":[2,4,17,55,63,71,113,131,139,161],"thousands":[3],"high-quality":[5],"open":[6],"source":[7,27,56],"software":[8,19,103,144],"projects":[9],"on":[10,59,110,168],"the":[11,14,49,69,72,86,129,136],"Internet":[12],"raise":[13],"exciting":[15],"possibility":[16],"studying":[18],"development":[20],"by":[21,75],"finding":[22],"patterns":[23],"across":[24],"truly":[25],"large":[26,42,114],"code":[28,87,133],"repositories.":[29],"This":[30,65],"could":[31],"enable":[32],"new":[33,99,106,125],"tools":[34],"for":[35,101],"developing":[36],"code,":[37,57],"encouraging":[38],"reuse,":[39],"and":[40,135],"navigating":[41],"projects.":[43],"In":[44,146],"this":[45],"paper,":[46],"we":[47],"build":[48],"first":[50],"giga-token":[51,80],"probabilistic":[52],"language":[53],"model":[54,81],"based":[58,109,166],"352":[60],"million":[61],"lines":[62],"Java.":[64],"is":[66,82,149],"100":[67],"times":[68],"scale":[70],"pioneering":[73],"work":[74],"Hindle":[76],"et":[77],"al.":[78],"significantly":[83],"better":[84],"at":[85],"suggestion":[88],"task":[89],"than":[90],"previous":[91],"models.":[92],"More":[93],"broadly,":[94],"our":[95],"approach":[96],"provides":[97],"a":[98,132,140,143,162],"\u201clens\u201d":[100],"analyzing":[102],"projects,":[104],"enabling":[105],"complexity":[107,121,130],"metrics":[108,119,126],"statistical":[111],"analysis":[112],"corpora.":[115],"We":[116,123],"call":[117],"these":[118],"data-driven":[120],"metrics.":[122],"propose":[124],"that":[127,158],"measure":[128],"module":[134,141],"topical":[137],"centrality":[138],"to":[142,151],"project.":[145],"particular,":[147],"it":[148],"possible":[150],"distinguish":[152],"reusable":[153],"utility":[154],"classes":[155,157],"from":[156],"are":[159],"part":[160],"program's":[163],"core":[164],"logic":[165],"solely":[167],"general":[169],"information":[170],"theoretic":[171],"criteria.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":29},{"year":2022,"cited_by_count":24},{"year":2021,"cited_by_count":46},{"year":2020,"cited_by_count":39},{"year":2019,"cited_by_count":38},{"year":2018,"cited_by_count":27},{"year":2017,"cited_by_count":24},{"year":2016,"cited_by_count":19},{"year":2015,"cited_by_count":25},{"year":2014,"cited_by_count":14},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
