{"id":"https://openalex.org/W3167339541","doi":"https://doi.org/10.1145/3472674.3473979","title":"Unsupervised learning of general-purpose embeddings for code changes","display_name":"Unsupervised learning of general-purpose embeddings for code changes","publication_year":2021,"publication_date":"2021-08-19","ids":{"openalex":"https://openalex.org/W3167339541","doi":"https://doi.org/10.1145/3472674.3473979","mag":"3167339541"},"language":"en","primary_location":{"id":"doi:10.1145/3472674.3473979","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472674.3473979","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Workshop on Machine Learning Techniques for Software Quality Evolution","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.02087","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059324436","display_name":"Mikhail Pravilov","orcid":"https://orcid.org/0000-0002-6376-6631"},"institutions":[{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Mikhail Pravilov","raw_affiliation_strings":["HSE University, Russia","HSE University,Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HSE University, Russia","institution_ids":["https://openalex.org/I118501908"]},{"raw_affiliation_string":"HSE University,Russia","institution_ids":["https://openalex.org/I118501908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088461870","display_name":"Egor Bogomolov","orcid":"https://orcid.org/0000-0002-3499-2402"},"institutions":[{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Egor Bogomolov","raw_affiliation_strings":["JetBrains Research, Russia / HSE University, Russia"],"raw_orcid":"https://orcid.org/0000-0002-3499-2402","affiliations":[{"raw_affiliation_string":"JetBrains Research, Russia / HSE University, Russia","institution_ids":["https://openalex.org/I118501908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013316832","display_name":"Yaroslav Golubev","orcid":"https://orcid.org/0000-0001-7009-635X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaroslav Golubev","raw_affiliation_strings":["JetBrains Research, Russia"],"raw_orcid":"https://orcid.org/0000-0001-7009-635X","affiliations":[{"raw_affiliation_string":"JetBrains Research, Russia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065316896","display_name":"Timofey Bryksin","orcid":"https://orcid.org/0000-0001-9022-3563"},"institutions":[{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Timofey Bryksin","raw_affiliation_strings":["JetBrains Research, Russia / HSE University, Russia"],"raw_orcid":"https://orcid.org/0000-0001-9022-3563","affiliations":[{"raw_affiliation_string":"JetBrains Research, Russia / HSE University, Russia","institution_ids":["https://openalex.org/I118501908"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2845,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60466068,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"7","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/commit","display_name":"Commit","score":0.8360710144042969},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.806580662727356},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.7552555799484253},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7139601111412048},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6180742383003235},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.615608811378479},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5510606169700623},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.547883152961731},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4848533868789673},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16148650646209717}],"concepts":[{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.8360710144042969},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.806580662727356},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.7552555799484253},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7139601111412048},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6180742383003235},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.615608811378479},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5510606169700623},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.547883152961731},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4848533868789673},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16148650646209717},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3472674.3473979","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472674.3473979","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Workshop on Machine Learning Techniques for Software Quality Evolution","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.02087","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.02087","pdf_url":"https://arxiv.org/pdf/2106.02087","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3167339541","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2106.02087","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.02087","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.02087","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.02087","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.02087","pdf_url":"https://arxiv.org/pdf/2106.02087","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3167339541.pdf","grobid_xml":"https://content.openalex.org/works/W3167339541.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1647671624","https://openalex.org/W2057049321","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2137659640","https://openalex.org/W2164948578","https://openalex.org/W2274071363","https://openalex.org/W2402619042","https://openalex.org/W2507756961","https://openalex.org/W2888312537","https://openalex.org/W2888328667","https://openalex.org/W2898951026","https://openalex.org/W2942293597","https://openalex.org/W2954823997","https://openalex.org/W2955654168","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2964308564","https://openalex.org/W2964322208","https://openalex.org/W2987083812","https://openalex.org/W2990317905","https://openalex.org/W2991579743","https://openalex.org/W2995333547","https://openalex.org/W3105867435","https://openalex.org/W3138597995"],"related_works":["https://openalex.org/W3195402592","https://openalex.org/W3112383693","https://openalex.org/W3111914164","https://openalex.org/W3127853158","https://openalex.org/W2991579743","https://openalex.org/W3161465675","https://openalex.org/W2927913379","https://openalex.org/W3040956736","https://openalex.org/W3011632945","https://openalex.org/W3102782538","https://openalex.org/W2968179027","https://openalex.org/W3015672537","https://openalex.org/W2921999611","https://openalex.org/W3098310070","https://openalex.org/W3159559013","https://openalex.org/W3086457636","https://openalex.org/W3166396011","https://openalex.org/W2572406310","https://openalex.org/W3107798002","https://openalex.org/W2982115308"],"abstract_inverted_index":{"Applying":[0],"machine":[1],"learning":[2],"to":[3,38,49],"tasks":[4,34],"that":[5,110],"operate":[6],"with":[7],"code":[8,39,53,63,75,114,133],"changes":[9,37,64,115],"requires":[10,61],"their":[11],"numerical":[12],"representation.":[13],"In":[14,70],"this":[15,105],"work,":[16],"we":[17],"propose":[18],"an":[19],"approach":[20],"for":[21,89,104],"obtaining":[22],"such":[23],"representations":[24],"during":[25],"pre-training":[26,125],"and":[27,40,117],"evaluate":[28],"them":[29],"on":[30,126],"two":[31],"different":[32],"downstream":[33],"\u2014":[35],"applying":[36,74],"commit":[41,91],"message":[42,92],"generation.":[43],"During":[44],"pre-training,":[45],"the":[46,51,71,90,97,122],"model":[47,78,95],"learns":[48],"apply":[50],"given":[52],"change":[54],"in":[55,86,121],"a":[56,127],"correct":[57],"way.":[58],"This":[59],"task":[60,72],"only":[62],"themselves,":[65],"which":[66,108],"makes":[67],"it":[68,111],"unsupervised.":[69],"of":[73,130],"changes,":[76],"our":[77,94],"outperforms":[79],"baseline":[80],"models":[81,102],"by":[82,124],"5.9":[83],"percentage":[84],"points":[85],"accuracy.":[87],"As":[88],"generation,":[93],"demonstrated":[96],"same":[98],"results":[99],"as":[100],"supervised":[101],"trained":[103],"specific":[106],"task,":[107],"indicates":[109],"can":[112,118],"encode":[113],"well":[116],"be":[119],"improved":[120],"future":[123],"larger":[128],"dataset":[129],"easily":[131],"gathered":[132],"changes.":[134]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
