{"id":"https://openalex.org/W3013762537","doi":"https://doi.org/10.1109/iwsc50091.2020.9047640","title":"Clone Detection on Large Scala Codebases","display_name":"Clone Detection on Large Scala Codebases","publication_year":2020,"publication_date":"2020-02-01","ids":{"openalex":"https://openalex.org/W3013762537","doi":"https://doi.org/10.1109/iwsc50091.2020.9047640","mag":"3013762537"},"language":"en","primary_location":{"id":"doi:10.1109/iwsc50091.2020.9047640","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwsc50091.2020.9047640","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 14th International Workshop on Software Clones (IWSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038270074","display_name":"Wahidur Rahman","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Wahidur Rahman","raw_affiliation_strings":["Imperial College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College London, London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008538061","display_name":"Yisen Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yisen Xu","raw_affiliation_strings":["Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113218205","display_name":"Fan Pu","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Pu","raw_affiliation_strings":["Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002360773","display_name":"Jifeng Xuan","orcid":"https://orcid.org/0000-0002-2968-3496"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jifeng Xuan","raw_affiliation_strings":["Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019942607","display_name":"Xiangyang Jia","orcid":"https://orcid.org/0000-0002-7449-1055"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyang Jia","raw_affiliation_strings":["Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085080130","display_name":"Michail Basios","orcid":null},"institutions":[{"id":"https://openalex.org/I125680101","display_name":"Turing Institute","ror":"https://ror.org/02x2mw849","country_code":"GB","type":"facility","lineage":["https://openalex.org/I125680101"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Michail Basios","raw_affiliation_strings":["Turing Intelligence Technology, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Turing Intelligence Technology, London, United Kingdom","institution_ids":["https://openalex.org/I125680101"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087209682","display_name":"Leslie Kanthan","orcid":null},"institutions":[{"id":"https://openalex.org/I125680101","display_name":"Turing Institute","ror":"https://ror.org/02x2mw849","country_code":"GB","type":"facility","lineage":["https://openalex.org/I125680101"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Leslie Kanthan","raw_affiliation_strings":["Turing Intelligence Technology, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Turing Intelligence Technology, London, United Kingdom","institution_ids":["https://openalex.org/I125680101"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101693627","display_name":"Lingbo Li","orcid":"https://orcid.org/0000-0002-3516-051X"},"institutions":[{"id":"https://openalex.org/I125680101","display_name":"Turing Institute","ror":"https://ror.org/02x2mw849","country_code":"GB","type":"facility","lineage":["https://openalex.org/I125680101"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lingbo Li","raw_affiliation_strings":["Turing Intelligence Technology, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Turing Intelligence Technology, London, United Kingdom","institution_ids":["https://openalex.org/I125680101"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023461095","display_name":"Fan Wu","orcid":"https://orcid.org/0000-0002-3734-7855"},"institutions":[{"id":"https://openalex.org/I125680101","display_name":"Turing Institute","ror":"https://ror.org/02x2mw849","country_code":"GB","type":"facility","lineage":["https://openalex.org/I125680101"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fan Wu","raw_affiliation_strings":["Turing Intelligence Technology, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Turing Intelligence Technology, London, United Kingdom","institution_ids":["https://openalex.org/I125680101"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100331400","display_name":"Baowen Xu","orcid":"https://orcid.org/0000-0001-7743-1296"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baowen Xu","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5038270074"],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":1.3906,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.86126719,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"38","last_page":"44"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7967560291290283},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.6759541034698486},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5868111848831177},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5299409627914429},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5197288990020752},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.5116472840309143},{"id":"https://openalex.org/keywords/scala","display_name":"Scala","score":0.506517767906189},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4654558300971985},{"id":"https://openalex.org/keywords/software-maintenance","display_name":"Software maintenance","score":0.41879162192344666},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4174240827560425},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.41243934631347656},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.32062995433807373},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.27294087409973145},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.23036572337150574},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.22985461354255676}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7967560291290283},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.6759541034698486},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5868111848831177},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5299409627914429},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5197288990020752},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.5116472840309143},{"id":"https://openalex.org/C109701466","wikidata":"https://www.wikidata.org/wiki/Q460584","display_name":"Scala","level":3,"score":0.506517767906189},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4654558300971985},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.41879162192344666},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4174240827560425},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.41243934631347656},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.32062995433807373},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.27294087409973145},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.23036572337150574},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.22985461354255676},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwsc50091.2020.9047640","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwsc50091.2020.9047640","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 14th International Workshop on Software Clones (IWSC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W71795751","https://openalex.org/W1593203335","https://openalex.org/W1971959540","https://openalex.org/W1983316175","https://openalex.org/W2025962632","https://openalex.org/W2041190309","https://openalex.org/W2084627971","https://openalex.org/W2101832700","https://openalex.org/W2119887272","https://openalex.org/W2128698639","https://openalex.org/W2128782367","https://openalex.org/W2128888088","https://openalex.org/W2138756793","https://openalex.org/W2142608175","https://openalex.org/W2143960295","https://openalex.org/W2152089398","https://openalex.org/W2158439356","https://openalex.org/W2164030845","https://openalex.org/W2166278331","https://openalex.org/W2286236884","https://openalex.org/W2334699878","https://openalex.org/W2413171555","https://openalex.org/W2511803001","https://openalex.org/W2578208870","https://openalex.org/W2624813485","https://openalex.org/W2728599219","https://openalex.org/W2748075515","https://openalex.org/W2766078762","https://openalex.org/W2789396442","https://openalex.org/W2801712578","https://openalex.org/W2807866521","https://openalex.org/W2883359218","https://openalex.org/W3083016217","https://openalex.org/W3100025638","https://openalex.org/W3104103145","https://openalex.org/W3105535951","https://openalex.org/W4301168982","https://openalex.org/W6602989467","https://openalex.org/W6635419185","https://openalex.org/W6679426468","https://openalex.org/W6683154609","https://openalex.org/W6751071065","https://openalex.org/W6753527320"],"related_works":["https://openalex.org/W2113128227","https://openalex.org/W632256878","https://openalex.org/W4211197663","https://openalex.org/W2491403535","https://openalex.org/W3123068371","https://openalex.org/W2479811461","https://openalex.org/W2104915799","https://openalex.org/W2355429491","https://openalex.org/W2142991486","https://openalex.org/W2294829769"],"abstract_inverted_index":{"Code":[0],"clones":[1,13,150],"are":[2,64,146],"identical":[3],"or":[4],"similar":[5],"code":[6,12,35,82],"segments.":[7],"The":[8,26],"wide":[9],"existence":[10],"of":[11,18,24,42,79,135],"can":[14],"increase":[15,126],"the":[16,22,59,77,100,112,116,124,136,152,158],"cost":[17],"maintenance":[19],"and":[20,87,94,123],"jeopardise":[21],"quality":[23],"software.":[25],"research":[27,75],"community":[28],"has":[29],"developed":[30],"many":[31],"techniques":[32,45,63],"to":[33,57],"detect":[34],"clones,":[36],"however,":[37],"there":[38,145],"is":[39],"little":[40],"evidence":[41],"how":[43],"these":[44],"may":[46],"perform":[47,109],"in":[48,66,99,119,127,151,157],"industrial":[49,67,96,113,137],"use":[50,68],"cases.":[51,69],"In":[52],"this":[53],"paper,":[54],"we":[55,142],"aim":[56],"uncover":[58],"differences":[60],"when":[61],"such":[62],"applied":[65],"We":[70],"conducted":[71],"large":[72],"scale":[73],"experimental":[74],"on":[76,89,111],"performance":[78],"two":[80],"state-of-the-art":[81],"clone":[83],"detection":[84],"techniques,":[85],"SourcererCC":[86],"AutoenCODE,":[88],"both":[90,107],"open":[91,159],"source":[92,160],"projects":[93],"an":[95],"project":[97,138,154],"written":[98],"Scala":[101],"language.":[102],"Our":[103],"results":[104],"reveal":[105],"that":[106,144,156],"algorithms":[108],"differently":[110],"project,":[114],"with":[115],"largest":[117,125],"drop":[118],"precision":[120],"being":[121,129],"30.7%,":[122],"recall":[128],"32.4%.":[130],"By":[131],"manually":[132],"labelling":[133],"samples":[134],"by":[139],"its":[140],"developers,":[141],"discovered":[143],"substantially":[147],"less":[148],"Type-3":[149],"aforementioned":[153],"than":[155],"projects.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
