{"id":"https://openalex.org/W3003634130","doi":"https://doi.org/10.1145/3468264.3468606","title":"Authorship attribution of source code: a language-agnostic approach and applicability in software engineering","display_name":"Authorship attribution of source code: a language-agnostic approach and applicability in software engineering","publication_year":2021,"publication_date":"2021-08-18","ids":{"openalex":"https://openalex.org/W3003634130","doi":"https://doi.org/10.1145/3468264.3468606","mag":"3003634130"},"language":"en","primary_location":{"id":"doi:10.1145/3468264.3468606","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3468264.3468606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088461870","display_name":"Egor Bogomolov","orcid":"https://orcid.org/0000-0002-3499-2402"},"institutions":[{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]}],"countries":["RU"],"is_corresponding":true,"raw_author_name":"Egor Bogomolov","raw_affiliation_strings":["JetBrains Research, Russia / HSE University, Russia","JetBrains Research and Higher School of Economics, Saint Petersburg, Russia"],"affiliations":[{"raw_affiliation_string":"JetBrains Research, Russia / HSE University, Russia","institution_ids":["https://openalex.org/I118501908"]},{"raw_affiliation_string":"JetBrains Research and Higher School of Economics, Saint Petersburg, Russia","institution_ids":["https://openalex.org/I118501908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005183323","display_name":"Vladimir Kovalenko","orcid":"https://orcid.org/0000-0001-5880-7323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vladimir Kovalenko","raw_affiliation_strings":["JetBrains Research, Netherlands","JetBrains Research and JetBrains N.V., Amsterdam, The Netherlands"],"affiliations":[{"raw_affiliation_string":"JetBrains Research, Netherlands","institution_ids":[]},{"raw_affiliation_string":"JetBrains Research and JetBrains N.V., Amsterdam, The Netherlands","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023814629","display_name":"Yurii Rebryk","orcid":null},"institutions":[{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Yurii Rebryk","raw_affiliation_strings":["HSE University, Russia","[Higher School of Economics]"],"affiliations":[{"raw_affiliation_string":"HSE University, Russia","institution_ids":["https://openalex.org/I118501908"]},{"raw_affiliation_string":"[Higher School of Economics]","institution_ids":["https://openalex.org/I118501908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082720005","display_name":"Alberto Bacchelli","orcid":"https://orcid.org/0000-0003-0193-6823"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Alberto Bacchelli","raw_affiliation_strings":["University of Zurich, Switzerland","University of Zurich Z\u00fcrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"University of Zurich, Switzerland","institution_ids":["https://openalex.org/I202697423"]},{"raw_affiliation_string":"University of Zurich Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065316896","display_name":"Timofey Bryksin","orcid":"https://orcid.org/0000-0001-9022-3563"},"institutions":[{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Timofey Bryksin","raw_affiliation_strings":["JetBrains Research, Russia / HSE University, Russia","JetBrains Research and Higher School of Economics, Saint Petersburg, Russia"],"affiliations":[{"raw_affiliation_string":"JetBrains Research, Russia / HSE University, Russia","institution_ids":["https://openalex.org/I118501908"]},{"raw_affiliation_string":"JetBrains Research and Higher School of Economics, Saint Petersburg, Russia","institution_ids":["https://openalex.org/I118501908"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5088461870"],"corresponding_institution_ids":["https://openalex.org/I118501908"],"apc_list":null,"apc_paid":null,"fwci":0.2844,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.57452184,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"932","last_page":"944"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.8607946634292603},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7511958479881287},{"id":"https://openalex.org/keywords/authorship-attribution","display_name":"Authorship attribution","score":0.74901282787323},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6582578420639038},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.624664306640625},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5609064102172852},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5499793291091919},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.514120876789093},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.44351404905319214},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25198066234588623},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18557977676391602},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.17048212885856628},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.06526139378547668}],"concepts":[{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.8607946634292603},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7511958479881287},{"id":"https://openalex.org/C3020202489","wikidata":"https://www.wikidata.org/wiki/Q2032038","display_name":"Authorship attribution","level":2,"score":0.74901282787323},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6582578420639038},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.624664306640625},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5609064102172852},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5499793291091919},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.514120876789093},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.44351404905319214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25198066234588623},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18557977676391602},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.17048212885856628},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.06526139378547668},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3468264.3468606","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3468264.3468606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","raw_type":"proceedings-article"},{"id":"pmh:oai:www.zora.uzh.ch:232070","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Bogomolov, Egor; Kovalenko, Vladimir; Rebryk, Yurii; Bacchelli, Alberto; Bryksin, Timofey  (2021). Authorship attribution of source code: a language-agnostic approach and applicability in software engineering.  In: ESEC/FSE '21: 29th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, Athens Greece, 23 September 2021 - 28 September 2021. ACM, 932-944.","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:arXiv.org:2001.11593","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.11593","pdf_url":"https://arxiv.org/pdf/2001.11593","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2001.11593","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2001.11593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.5167/uzh-232070","is_oa":true,"landing_page_url":"https://doi.org/10.5167/uzh-232070","pdf_url":null,"source":{"id":"https://openalex.org/S7407051291","display_name":"Universit\u00e4t Z\u00fcrich, ZORA","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"pmh:oai:www.zora.uzh.ch:232070","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Bogomolov, Egor; Kovalenko, Vladimir; Rebryk, Yurii; Bacchelli, Alberto; Bryksin, Timofey  (2021). Authorship attribution of source code: a language-agnostic approach and applicability in software engineering.  In: ESEC/FSE '21: 29th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, Athens Greece, 23 September 2021 - 28 September 2021. ACM, 932-944.","raw_type":"Conference or Workshop Item"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G2663829463","display_name":"Enhanced Code Review: Using Context and Learning from Review Experience","funder_award_id":"197227","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G4836148144","display_name":null,"funder_award_id":"20002","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8584254424","display_name":null,"funder_award_id":"200021_197227","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G993971353","display_name":null,"funder_award_id":"200021","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W54929040","https://openalex.org/W90413272","https://openalex.org/W1463623766","https://openalex.org/W1529790664","https://openalex.org/W1576040511","https://openalex.org/W1956188504","https://openalex.org/W1984141355","https://openalex.org/W1985994919","https://openalex.org/W1995875735","https://openalex.org/W2007209062","https://openalex.org/W2010452235","https://openalex.org/W2065489029","https://openalex.org/W2069985874","https://openalex.org/W2072164438","https://openalex.org/W2076279155","https://openalex.org/W2078111377","https://openalex.org/W2079317829","https://openalex.org/W2091543666","https://openalex.org/W2101234009","https://openalex.org/W2102833942","https://openalex.org/W2107697055","https://openalex.org/W2110127053","https://openalex.org/W2119410041","https://openalex.org/W2122219434","https://openalex.org/W2127190390","https://openalex.org/W2133564696","https://openalex.org/W2140609933","https://openalex.org/W2140952846","https://openalex.org/W2145574830","https://openalex.org/W2146500806","https://openalex.org/W2150855759","https://openalex.org/W2152195021","https://openalex.org/W2167117640","https://openalex.org/W2167135271","https://openalex.org/W2374812233","https://openalex.org/W2542890519","https://openalex.org/W2543580944","https://openalex.org/W2547213717","https://openalex.org/W2740860932","https://openalex.org/W2742956140","https://openalex.org/W2762844179","https://openalex.org/W2765254708","https://openalex.org/W2783268209","https://openalex.org/W2795150841","https://openalex.org/W2810627707","https://openalex.org/W2890081359","https://openalex.org/W2914961791","https://openalex.org/W2951861246","https://openalex.org/W2955127311","https://openalex.org/W2963541700","https://openalex.org/W2964150020","https://openalex.org/W2997591727","https://openalex.org/W3005951744","https://openalex.org/W3089853590","https://openalex.org/W3126076904","https://openalex.org/W3144106047","https://openalex.org/W4239439500","https://openalex.org/W4252684946"],"related_works":["https://openalex.org/W2146591353","https://openalex.org/W2771482031","https://openalex.org/W1487417480","https://openalex.org/W2863352484","https://openalex.org/W1555486186","https://openalex.org/W2170190737","https://openalex.org/W2072828027","https://openalex.org/W2138278076","https://openalex.org/W2953828878","https://openalex.org/W3163107682","https://openalex.org/W3130662425","https://openalex.org/W3103032226","https://openalex.org/W998146211","https://openalex.org/W1727330127","https://openalex.org/W3174696116","https://openalex.org/W2545327224","https://openalex.org/W1994602603","https://openalex.org/W1439900778","https://openalex.org/W2495326278","https://openalex.org/W1861959267"],"abstract_inverted_index":{"Authorship":[0],"attribution":[1,24,58,102,128],"(i.e.,":[2],"determining":[3],"who":[4],"is":[5,14],"the":[6,22,29,122,133],"author":[7],"of":[8,11,59,66,100,126],"a":[9,52,75],"piece":[10],"source":[12,60],"code)":[13],"an":[15],"established":[16],"research":[17,134],"topic.":[18],"State-of-the-art":[19],"results":[20],"for":[21,28,70,87,121,140],"authorship":[23,57,71,101,127],"problem":[25],"look":[26],"promising":[27],"software":[30,92,141],"engineering":[31],"field,":[32],"where":[33],"they":[34,110],"could":[35,131],"be":[36],"applied":[37],"to":[38,56,137],"detect":[39],"plagiarized":[40],"code":[41],"and":[42,73,124],"prevent":[43],"legal":[44],"issues.":[45],"With":[46],"this":[47],"article,":[48],"we":[49,63,95],"first":[50],"introduce":[51],"new":[53],"language-agnostic":[54],"approach":[55,78],"code.":[61],"Then,":[62],"discuss":[64],"limitations":[65],"existing":[67,105],"synthetic":[68],"datasets":[69,81,106],"attribution,":[72],"propose":[74],"data":[76],"collection":[77],"that":[79,82,97,130],"delivers":[80],"better":[83],"reflect":[84],"aspects":[85],"important":[86],"potential":[88],"practical":[89,138],"use":[90,139],"in":[91],"engineering.":[93,142],"Finally,":[94],"demonstrate":[96],"high":[98],"accuracy":[99],"models":[103,129],"on":[104,113],"drastically":[107],"drops":[108],"when":[109],"are":[111],"evaluated":[112],"more":[114],"realistic":[115],"data.":[116],"We":[117],"outline":[118],"next":[119],"steps":[120],"design":[123],"evaluation":[125],"bring":[132],"efforts":[135],"closer":[136]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
