{"id":"https://openalex.org/W2461136558","doi":"https://doi.org/10.1109/icpc.2016.7503720","title":"Identifying functionally similar code in complex codebases","display_name":"Identifying functionally similar code in complex codebases","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2461136558","doi":"https://doi.org/10.1109/icpc.2016.7503720","mag":"2461136558"},"language":"en","primary_location":{"id":"doi:10.1109/icpc.2016.7503720","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpc.2016.7503720","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 24th International Conference on Program Comprehension (ICPC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.7916/d8bk1cv4","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006844232","display_name":"Fang-Hsiang Su","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Fang-Hsiang Su","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053080959","display_name":"Jonathan Bell","orcid":"https://orcid.org/0000-0002-1187-9298"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Bell","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090171418","display_name":"Gail E. Kaiser","orcid":"https://orcid.org/0000-0002-8791-1178"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gail Kaiser","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030436580","display_name":"Simha Sethumadhavan","orcid":"https://orcid.org/0000-0002-6180-7153"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Simha Sethumadhavan","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5006844232"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":11.0814,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.98182928,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.9016064405441284},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.863448977470398},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.6027372479438782},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5953137874603271},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5242547988891602},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5077391862869263},{"id":"https://openalex.org/keywords/software-maintenance","display_name":"Software maintenance","score":0.47562816739082336},{"id":"https://openalex.org/keywords/source-lines-of-code","display_name":"Source lines of code","score":0.468046635389328},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.46666425466537476},{"id":"https://openalex.org/keywords/program-comprehension","display_name":"Program comprehension","score":0.4655640125274658},{"id":"https://openalex.org/keywords/software-system","display_name":"Software system","score":0.4441082179546356},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.43736183643341064},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.40061748027801514},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.07632121443748474}],"concepts":[{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.9016064405441284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.863448977470398},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.6027372479438782},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5953137874603271},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5242547988891602},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5077391862869263},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.47562816739082336},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.468046635389328},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.46666425466537476},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.4655640125274658},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.4441082179546356},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.43736183643341064},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.40061748027801514},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07632121443748474},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icpc.2016.7503720","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpc.2016.7503720","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 24th International Conference on Program Comprehension (ICPC)","raw_type":"proceedings-article"},{"id":"pmh:oai:academiccommons.columbia.edu:10.7916/D8BK1CV4","is_oa":false,"landing_page_url":"https://doi.org/10.7916/D8BK1CV4","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Reports"},{"id":"doi:10.7916/d8bk1cv4","is_oa":true,"landing_page_url":"https://doi.org/10.7916/d8bk1cv4","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.7916/d8bk1cv4","is_oa":true,"landing_page_url":"https://doi.org/10.7916/d8bk1cv4","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1533075034","display_name":null,"funder_award_id":"CCF-1302269","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5402090779","display_name":null,"funder_award_id":"F-130","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6559061274","display_name":null,"funder_award_id":"1302269","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7359969683","display_name":"SHF: MEDIUM: Achieving Software Reliability without True Test Oracles","funder_award_id":"1161079","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W596492259","https://openalex.org/W1514258760","https://openalex.org/W1567790484","https://openalex.org/W1965194038","https://openalex.org/W1989790414","https://openalex.org/W2017212970","https://openalex.org/W2027450054","https://openalex.org/W2040923543","https://openalex.org/W2065053490","https://openalex.org/W2074529754","https://openalex.org/W2077720983","https://openalex.org/W2086320398","https://openalex.org/W2096491586","https://openalex.org/W2101832700","https://openalex.org/W2102748361","https://openalex.org/W2107697055","https://openalex.org/W2120322286","https://openalex.org/W2121669067","https://openalex.org/W2125260159","https://openalex.org/W2125980283","https://openalex.org/W2128782367","https://openalex.org/W2130169574","https://openalex.org/W2136099030","https://openalex.org/W2138756793","https://openalex.org/W2141535436","https://openalex.org/W2151573418","https://openalex.org/W2157532207","https://openalex.org/W2160140538","https://openalex.org/W2164233915","https://openalex.org/W3104227690","https://openalex.org/W3142518606","https://openalex.org/W3144154882","https://openalex.org/W3147107715","https://openalex.org/W4247224463","https://openalex.org/W6617952372","https://openalex.org/W6633995546"],"related_works":["https://openalex.org/W4286331421","https://openalex.org/W2069073539","https://openalex.org/W2765394967","https://openalex.org/W2010680342","https://openalex.org/W1985559452","https://openalex.org/W3033357505","https://openalex.org/W2186150085","https://openalex.org/W1938356314","https://openalex.org/W2406602165","https://openalex.org/W2367846948"],"abstract_inverted_index":{"Identifying":[0],"similar":[1],"code":[2,25,34,43],"in":[3,48,60,79,118,130],"software":[4,9,17],"systems":[5],"can":[6],"assist":[7],"many":[8],"engineering":[10],"tasks":[11],"such":[12,128],"as":[13],"program":[14],"understanding":[15],"and":[16,62,84,88,101,112,138],"refactoring.":[18],"While":[19],"most":[20],"approaches":[21],"focus":[22],"on":[23,109],"identifying":[24,83],"that":[26,35,44,75,145],"looks":[27],"alike,":[28],"some":[29],"techniques":[30],"aim":[31],"at":[32],"detecting":[33],"functions":[36,45],"alike.":[37],"Detecting":[38],"these":[39],"functional":[40,77,104,151],"clones":[41,78,152],"\u2014":[42,47],"alike":[46],"object":[49,119],"oriented":[50,120],"languages":[51,121],"remains":[52],"an":[53],"open":[54,136],"question":[55],"because":[56],"of":[57,156,163],"the":[58,116,164],"difficulty":[59],"exposing":[61],"comparing":[63],"programs'":[64],"functionality":[65],"effectively.":[66],"We":[67,126],"propose":[68],"a":[69,154,160],"novel":[70],"technique,":[71],"In-Vivo":[72],"Clone":[73],"Detection,":[74],"detects":[76,147],"arbitrary":[80],"programs":[81,100,107],"by":[82,123],"mining":[85],"their":[86,110],"inputs":[87,111],"outputs.":[89],"The":[90],"key":[91],"insight":[92],"is":[93,135],"to":[94,98],"use":[95],"existing":[96],"workloads":[97],"execute":[99],"then":[102],"measure":[103],"similarities":[105],"between":[106],"based":[108],"outputs,":[113],"which":[114,134],"mitigates":[115],"problems":[117],"reported":[122],"prior":[124],"work.":[125],"implement":[127],"technique":[129],"our":[131],"system,":[132],"HitoshiIO,":[133],"source":[137],"freely":[139],"available.":[140],"Our":[141],"experimental":[142],"results":[143],"show":[144],"HitoshiIO":[146,167],"more":[148],"than":[149],"800":[150],"across":[153],"corpus":[155],"118":[157],"projects.":[158],"In":[159],"random":[161],"sample":[162],"detected":[165],"clones,":[166],"achieves":[168],"68+%":[169],"true":[170],"positive":[171,177],"rate":[172],"with":[173],"only":[174],"15%":[175],"false":[176],"rate.":[178]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
