{"id":"https://openalex.org/W2511803001","doi":"https://doi.org/10.1145/2970276.2970326","title":"Deep learning code fragments for code clone detection","display_name":"Deep learning code fragments for code clone detection","publication_year":2016,"publication_date":"2016-08-25","ids":{"openalex":"https://openalex.org/W2511803001","doi":"https://doi.org/10.1145/2970276.2970326","mag":"2511803001"},"language":"en","primary_location":{"id":"doi:10.1145/2970276.2970326","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2970276.2970326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st IEEE/ACM International Conference on Automated Software Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052669359","display_name":"Martin White","orcid":"https://orcid.org/0000-0001-8686-2274"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Martin White","raw_affiliation_strings":["College of William and Mary, USA"],"affiliations":[{"raw_affiliation_string":"College of William and Mary, USA","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020154435","display_name":"Michele Tufano","orcid":"https://orcid.org/0000-0003-2225-2420"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michele Tufano","raw_affiliation_strings":["College of William and Mary, USA"],"affiliations":[{"raw_affiliation_string":"College of William and Mary, USA","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037563230","display_name":"Christopher Vendome","orcid":"https://orcid.org/0000-0002-4069-0558"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Vendome","raw_affiliation_strings":["College of William and Mary, USA"],"affiliations":[{"raw_affiliation_string":"College of William and Mary, USA","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041262116","display_name":"Denys Poshyvanyk","orcid":"https://orcid.org/0000-0002-5626-7586"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Denys Poshyvanyk","raw_affiliation_strings":["College of William and Mary, USA"],"affiliations":[{"raw_affiliation_string":"College of William and Mary, USA","institution_ids":["https://openalex.org/I16285277"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5052669359"],"corresponding_institution_ids":["https://openalex.org/I16285277"],"apc_list":null,"apc_paid":null,"fwci":108.779,"has_fulltext":false,"cited_by_count":563,"citation_normalized_percentile":{"value":0.9995723,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"87","last_page":"98"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8241163492202759},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.7347684502601624},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.7178035974502563},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.7036178708076477},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.686116635799408},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.6642772555351257},{"id":"https://openalex.org/keywords/software-maintenance","display_name":"Software maintenance","score":0.6545652747154236},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6269950866699219},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5071160793304443},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4556754529476166},{"id":"https://openalex.org/keywords/program-comprehension","display_name":"Program comprehension","score":0.4372890889644623},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.423639178276062},{"id":"https://openalex.org/keywords/static-program-analysis","display_name":"Static program analysis","score":0.42080622911453247},{"id":"https://openalex.org/keywords/software-system","display_name":"Software system","score":0.4055119752883911},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3882845342159271},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3651696741580963},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3567860424518585},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.2634100914001465},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.15841108560562134}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8241163492202759},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.7347684502601624},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.7178035974502563},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.7036178708076477},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.686116635799408},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.6642772555351257},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.6545652747154236},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6269950866699219},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5071160793304443},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4556754529476166},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.4372890889644623},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.423639178276062},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.42080622911453247},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.4055119752883911},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3882845342159271},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3651696741580963},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3567860424518585},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2634100914001465},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.15841108560562134},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2970276.2970326","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2970276.2970326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st IEEE/ACM International Conference on Automated Software Engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310865","display_name":"University of Saskatchewan","ror":"https://ror.org/010x8gc63"},{"id":"https://openalex.org/F4320328656","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":129,"referenced_works":["https://openalex.org/W5897337","https://openalex.org/W36903255","https://openalex.org/W60493759","https://openalex.org/W71795751","https://openalex.org/W104184427","https://openalex.org/W179875071","https://openalex.org/W182831726","https://openalex.org/W196214544","https://openalex.org/W1480376833","https://openalex.org/W1500382227","https://openalex.org/W1502916507","https://openalex.org/W1506806321","https://openalex.org/W1507039213","https://openalex.org/W1512285202","https://openalex.org/W1519220202","https://openalex.org/W1562646198","https://openalex.org/W1564657277","https://openalex.org/W1570745693","https://openalex.org/W1593203335","https://openalex.org/W1673239900","https://openalex.org/W1806891645","https://openalex.org/W1857184435","https://openalex.org/W1964169783","https://openalex.org/W1965154800","https://openalex.org/W1970859802","https://openalex.org/W1972141422","https://openalex.org/W1973650376","https://openalex.org/W1974020522","https://openalex.org/W1983118588","https://openalex.org/W1992602600","https://openalex.org/W1993897559","https://openalex.org/W1993976072","https://openalex.org/W1997102498","https://openalex.org/W1998399571","https://openalex.org/W2008085811","https://openalex.org/W2010608861","https://openalex.org/W2018389835","https://openalex.org/W2019374535","https://openalex.org/W2037291277","https://openalex.org/W2049461910","https://openalex.org/W2050703362","https://openalex.org/W2054855378","https://openalex.org/W2056830820","https://openalex.org/W2060384944","https://openalex.org/W2065053490","https://openalex.org/W2072128103","https://openalex.org/W2074529754","https://openalex.org/W2077155146","https://openalex.org/W2088479623","https://openalex.org/W2090432523","https://openalex.org/W2090878800","https://openalex.org/W2091812280","https://openalex.org/W2093861610","https://openalex.org/W2096491586","https://openalex.org/W2100506586","https://openalex.org/W2101832700","https://openalex.org/W2104301886","https://openalex.org/W2104518905","https://openalex.org/W2104982710","https://openalex.org/W2107697055","https://openalex.org/W2109943392","https://openalex.org/W2113500796","https://openalex.org/W2113697305","https://openalex.org/W2116783278","https://openalex.org/W2118024368","https://openalex.org/W2119109877","https://openalex.org/W2120319185","https://openalex.org/W2120322286","https://openalex.org/W2120861206","https://openalex.org/W2122334745","https://openalex.org/W2125260159","https://openalex.org/W2126793110","https://openalex.org/W2128698639","https://openalex.org/W2128782367","https://openalex.org/W2129386590","https://openalex.org/W2131477050","https://openalex.org/W2136099030","https://openalex.org/W2138452145","https://openalex.org/W2138756793","https://openalex.org/W2140609933","https://openalex.org/W2142403498","https://openalex.org/W2143151143","https://openalex.org/W2143861926","https://openalex.org/W2143960295","https://openalex.org/W2144344516","https://openalex.org/W2144854572","https://openalex.org/W2147152072","https://openalex.org/W2150355110","https://openalex.org/W2153029781","https://openalex.org/W2157532207","https://openalex.org/W2158439356","https://openalex.org/W2159505618","https://openalex.org/W2162739315","https://openalex.org/W2164030845","https://openalex.org/W2164233915","https://openalex.org/W2164403446","https://openalex.org/W2165747537","https://openalex.org/W2166278331","https://openalex.org/W2171293873","https://openalex.org/W2171928131","https://openalex.org/W2251939518","https://openalex.org/W2294554540","https://openalex.org/W2298313545","https://openalex.org/W2396976214","https://openalex.org/W2437096199","https://openalex.org/W2474824677","https://openalex.org/W2565701092","https://openalex.org/W2787894218","https://openalex.org/W2792339243","https://openalex.org/W2913932916","https://openalex.org/W2919115771","https://openalex.org/W2950075229","https://openalex.org/W2951324391","https://openalex.org/W2951650375","https://openalex.org/W2964335273","https://openalex.org/W3104921985","https://openalex.org/W3145128584","https://openalex.org/W4231109964","https://openalex.org/W4285719527","https://openalex.org/W6607467106","https://openalex.org/W6629325410","https://openalex.org/W6629824143","https://openalex.org/W6638167432","https://openalex.org/W6638928880","https://openalex.org/W6639497327","https://openalex.org/W6675140444","https://openalex.org/W6680532216","https://openalex.org/W6760385162","https://openalex.org/W7029321148"],"related_works":["https://openalex.org/W3008773848","https://openalex.org/W2066182606","https://openalex.org/W2136808032","https://openalex.org/W2883301371","https://openalex.org/W2460969868","https://openalex.org/W53653089","https://openalex.org/W2765394967","https://openalex.org/W4385485083","https://openalex.org/W2103701733","https://openalex.org/W1536501326"],"abstract_inverted_index":{"Code":[0],"clone":[1,98,152,193],"detection":[2,24,46,99,194],"is":[3,58,190],"an":[4],"important":[5],"problem":[6],"for":[7,50,74,96,192,199],"software":[8,110],"maintenance":[9],"and":[10,53,114,119,132,163,195],"evolution.":[11],"Many":[12],"approaches":[13],"consider":[14],"either":[15,173],"structure":[16],"or":[17,175],"identifiers,":[18],"but":[19],"none":[20],"of":[21,29,107,109,129],"the":[22,61,80,87,105,130,142,179],"existing":[23],"techniques":[25,32,47],"model":[26],"both":[27],"sources":[28],"information.":[30],"These":[31],"also":[33],"depend":[34],"on":[35,71],"generic,":[36],"handcrafted":[37],"features":[38],"to":[39,102,137,149,158],"represent":[40],"code":[41,57,64,97],"fragments.":[42],"We":[43,90,112,154],"introduce":[44],"learning-based":[45,94,167,188],"where":[48],"everything":[49],"representing":[51],"terms":[52],"fragments":[54],"in":[55],"source":[56],"mined":[59,78,85],"from":[60,104],"repository.":[62],"Our":[63,183],"analysis":[65],"supports":[66],"a":[67,159,196],"framework,":[68],"which":[69],"relies":[70],"deep":[72],"learning,":[73],"automatically":[75],"linking":[76],"patterns":[77,84],"at":[79,86],"lexical":[81],"level":[82],"with":[83,100],"syntactic":[88],"level.":[89],"evaluated":[91,116,136],"our":[92,156,166,187],"novel":[93],"approach":[95,157,168,189],"respect":[101],"feasibility":[103],"point":[106],"view":[108],"maintainers.":[111],"sampled":[113],"manually":[115],"398":[117],"file-":[118,131],"480":[120],"method-level":[121,133],"pairs":[122,147],"across":[123],"eight":[124],"real-world":[125],"Java":[126],"systems;":[127],"93%":[128],"samples":[134],"were":[135,172],"be":[138],"true":[139,143],"positives.":[140],"Among":[141],"positives,":[144],"we":[145],"found":[146,164],"mapping":[148],"all":[150],"four":[151],"types.":[153],"compared":[155],"traditional":[160],"structure-oriented":[161],"technique":[162,198],"that":[165,171,186],"detected":[169],"clones":[170],"undetected":[174],"suboptimally":[176],"reported":[177],"by":[178],"prominent":[180],"tool":[181],"Deckard.":[182],"results":[184],"affirm":[185],"suitable":[191],"tenable":[197],"researchers.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":34},{"year":2024,"cited_by_count":46},{"year":2023,"cited_by_count":69},{"year":2022,"cited_by_count":62},{"year":2021,"cited_by_count":88},{"year":2020,"cited_by_count":100},{"year":2019,"cited_by_count":86},{"year":2018,"cited_by_count":49},{"year":2017,"cited_by_count":21},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
