{"id":"https://openalex.org/W2883359218","doi":"https://doi.org/10.1145/3196398.3196431","title":"Deep learning similarities from different representations of source code","display_name":"Deep learning similarities from different representations of source code","publication_year":2018,"publication_date":"2018-05-28","ids":{"openalex":"https://openalex.org/W2883359218","doi":"https://doi.org/10.1145/3196398.3196431","mag":"2883359218"},"language":"en","primary_location":{"id":"doi:10.1145/3196398.3196431","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3196398.3196431","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3196398.3196431","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th International Conference on Mining Software Repositories","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3196398.3196431","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020154435","display_name":"Michele Tufano","orcid":"https://orcid.org/0000-0003-2225-2420"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michele Tufano","raw_affiliation_strings":["College of William and Mary"],"affiliations":[{"raw_affiliation_string":"College of William and Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104271099","display_name":"Cody Watson","orcid":null},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cody Watson","raw_affiliation_strings":["College of William and Mary"],"affiliations":[{"raw_affiliation_string":"College of William and Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056526226","display_name":"Gabriele Bavota","orcid":"https://orcid.org/0000-0002-2216-3148"},"institutions":[{"id":"https://openalex.org/I57201433","display_name":"Universit\u00e0 della Svizzera italiana","ror":"https://ror.org/03c4atk17","country_code":"CH","type":"education","lineage":["https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Gabriele Bavota","raw_affiliation_strings":["Universit\u00e0 della Svizzera italiana (USI), Lugano, Switzerland"],"affiliations":[{"raw_affiliation_string":"Universit\u00e0 della Svizzera italiana (USI), Lugano, Switzerland","institution_ids":["https://openalex.org/I57201433"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025099559","display_name":"Massimiliano Di Penta","orcid":"https://orcid.org/0000-0002-0340-9747"},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Massimiliano Di Penta","raw_affiliation_strings":["University of Sannio, Benevento, Italy"],"affiliations":[{"raw_affiliation_string":"University of Sannio, Benevento, Italy","institution_ids":["https://openalex.org/I16337185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052669359","display_name":"Martin White","orcid":"https://orcid.org/0000-0001-8686-2274"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin White","raw_affiliation_strings":["College of William and Mary"],"affiliations":[{"raw_affiliation_string":"College of William and Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041262116","display_name":"Denys Poshyvanyk","orcid":"https://orcid.org/0000-0002-5626-7586"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Denys Poshyvanyk","raw_affiliation_strings":["College of William and Mary"],"affiliations":[{"raw_affiliation_string":"College of William and Mary","institution_ids":["https://openalex.org/I16285277"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5020154435"],"corresponding_institution_ids":["https://openalex.org/I16285277"],"apc_list":null,"apc_paid":null,"fwci":29.2389,"has_fulltext":true,"cited_by_count":145,"citation_normalized_percentile":{"value":0.99563177,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"542","last_page":"553"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8319413661956787},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.7269597053527832},{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.7241789102554321},{"id":"https://openalex.org/keywords/abstract-syntax-tree","display_name":"Abstract syntax tree","score":0.6617898941040039},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.6250383257865906},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5590895414352417},{"id":"https://openalex.org/keywords/abstract-syntax","display_name":"Abstract syntax","score":0.5366823673248291},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5165649652481079},{"id":"https://openalex.org/keywords/static-program-analysis","display_name":"Static program analysis","score":0.4998207092285156},{"id":"https://openalex.org/keywords/program-comprehension","display_name":"Program comprehension","score":0.49109745025634766},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.462015300989151},{"id":"https://openalex.org/keywords/bytecode","display_name":"Bytecode","score":0.45941007137298584},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4445438086986542},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.4304009974002838},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41886383295059204},{"id":"https://openalex.org/keywords/software-maintenance","display_name":"Software maintenance","score":0.4131418466567993},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3997398018836975},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.33415770530700684},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.22577598690986633},{"id":"https://openalex.org/keywords/software-system","display_name":"Software system","score":0.22309231758117676},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.2125394344329834}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8319413661956787},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.7269597053527832},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.7241789102554321},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.6617898941040039},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.6250383257865906},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5590895414352417},{"id":"https://openalex.org/C114408938","wikidata":"https://www.wikidata.org/wiki/Q333373","display_name":"Abstract syntax","level":3,"score":0.5366823673248291},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5165649652481079},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.4998207092285156},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.49109745025634766},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.462015300989151},{"id":"https://openalex.org/C2779818221","wikidata":"https://www.wikidata.org/wiki/Q837330","display_name":"Bytecode","level":3,"score":0.45941007137298584},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4445438086986542},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.4304009974002838},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41886383295059204},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.4131418466567993},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3997398018836975},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.33415770530700684},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.22577598690986633},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.22309231758117676},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2125394344329834},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3196398.3196431","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3196398.3196431","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3196398.3196431","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th International Conference on Mining Software Repositories","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3196398.3196431","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3196398.3196431","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3196398.3196431","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th International Conference on Mining Software Repositories","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6310427565","display_name":null,"funder_award_id":"CCF-1525902","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309355","display_name":"Commonwealth of Virginia","ror":null},{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2883359218.pdf","grobid_xml":"https://content.openalex.org/works/W2883359218.grobid-xml"},"referenced_works_count":75,"referenced_works":["https://openalex.org/W71795751","https://openalex.org/W1423339008","https://openalex.org/W1512285202","https://openalex.org/W1539495021","https://openalex.org/W1570745693","https://openalex.org/W1593203335","https://openalex.org/W1964169783","https://openalex.org/W1975790660","https://openalex.org/W1975879668","https://openalex.org/W1977657402","https://openalex.org/W1980057827","https://openalex.org/W1983159198","https://openalex.org/W1997784518","https://openalex.org/W2007818470","https://openalex.org/W2009977560","https://openalex.org/W2010608861","https://openalex.org/W2014116953","https://openalex.org/W2015861736","https://openalex.org/W2025791343","https://openalex.org/W2049461910","https://openalex.org/W2059215200","https://openalex.org/W2068160829","https://openalex.org/W2068382122","https://openalex.org/W2083878868","https://openalex.org/W2088479623","https://openalex.org/W2096491586","https://openalex.org/W2104301886","https://openalex.org/W2104518905","https://openalex.org/W2104982710","https://openalex.org/W2107697055","https://openalex.org/W2109943392","https://openalex.org/W2116272605","https://openalex.org/W2118024368","https://openalex.org/W2125260159","https://openalex.org/W2125980283","https://openalex.org/W2128698639","https://openalex.org/W2128782367","https://openalex.org/W2131477050","https://openalex.org/W2138756793","https://openalex.org/W2145700761","https://openalex.org/W2147296306","https://openalex.org/W2157532207","https://openalex.org/W2158439356","https://openalex.org/W2163922914","https://openalex.org/W2164233915","https://openalex.org/W2165739648","https://openalex.org/W2166278331","https://openalex.org/W2249980257","https://openalex.org/W2251939518","https://openalex.org/W2286236884","https://openalex.org/W2360967250","https://openalex.org/W2387462954","https://openalex.org/W2402619042","https://openalex.org/W2407167388","https://openalex.org/W2470924201","https://openalex.org/W2495601248","https://openalex.org/W2511803001","https://openalex.org/W2575109289","https://openalex.org/W2593675739","https://openalex.org/W2605202003","https://openalex.org/W2612872092","https://openalex.org/W2620900956","https://openalex.org/W2729710884","https://openalex.org/W2740130862","https://openalex.org/W2741705590","https://openalex.org/W2751734926","https://openalex.org/W2911964244","https://openalex.org/W2913273467","https://openalex.org/W2914905043","https://openalex.org/W2950133940","https://openalex.org/W2962995178","https://openalex.org/W2963371736","https://openalex.org/W3099095494","https://openalex.org/W4247950230","https://openalex.org/W4301168982"],"related_works":["https://openalex.org/W2536864162","https://openalex.org/W3176913510","https://openalex.org/W3138016835","https://openalex.org/W2077104824","https://openalex.org/W1599055565","https://openalex.org/W2765394967","https://openalex.org/W3197333277","https://openalex.org/W1625533892","https://openalex.org/W2367115345","https://openalex.org/W2318302692"],"abstract_inverted_index":{"Assessing":[0],"the":[1,42,48,91,104,143],"similarity":[2,27,74],"between":[3],"code":[4,54,73,111,133,145,175],"components":[5],"plays":[6],"a":[7,11,59,76,85,137,149,168],"pivotal":[8],"role":[9],"in":[10,155],"number":[12],"of":[13,52,87,106,118,142,153],"Software":[14],"Engineering":[15],"(SE)":[16],"tasks,":[17],"such":[18],"as":[19],"clone":[20,107],"detection,":[21],"impact":[22],"analysis,":[23],"refactoring,":[24],"etc.":[25],"Code":[26],"is":[28],"generally":[29],"measured":[30],"by":[31,40],"relying":[32],"on":[33],"manually":[34],"defined":[35],"or":[36,46],"hand-crafted":[37],"features,":[38],"e.g.,":[39],"analyzing":[41],"overlap":[43],"among":[44],"identifiers":[45,88],"comparing":[47],"Abstract":[49,121],"Syntax":[50,122],"Trees":[51],"two":[53],"components.":[55],"These":[56],"features":[57],"represent":[58,90],"best":[60],"guess":[61],"at":[62,115],"what":[63],"SE":[64,163],"researchers":[65],"can":[66,97,112,135,165,172],"utilize":[67],"to":[68,89],"exploit":[69],"and":[70,127],"reliably":[71],"assess":[72],"for":[75,103],"given":[77],"task.":[78],"Recent":[79],"work":[80],"has":[81],"shown,":[82],"when":[83],"using":[84],"stream":[86],"code,":[92],"that":[93,131],"Deep":[94],"Learning":[95],"(DL)":[96],"effectively":[98],"replace":[99],"manual":[100],"feature":[101],"engineering":[102],"task":[105],"detection.":[108],"However,":[109],"source":[110],"be":[113],"represented":[114],"different":[116,178],"levels":[117],"abstraction:":[119],"identifiers,":[120],"Trees,":[123],"Control":[124],"Flow":[125],"Graphs,":[126],"Bytecode.":[128],"We":[129],"conjecture":[130],"each":[132],"representation":[134],"provide":[136],"different,":[138],"yet":[139],"orthogonal":[140],"view":[141],"same":[144],"fragment,":[146],"thus,":[147],"enabling":[148],"more":[150],"reliable":[151],"detection":[152],"similarities":[154,176],"code.":[156],"In":[157],"this":[158],"paper,":[159],"we":[160],"demonstrate":[161],"how":[162],"tasks":[164],"benefit":[166],"from":[167,177],"DL-based":[169],"approach,":[170],"which":[171],"automatically":[173],"learn":[174],"representations.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":25},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":21},{"year":2019,"cited_by_count":18},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
