{"id":"https://openalex.org/W2320803981","doi":"https://doi.org/10.1109/icawst.2014.6981824","title":"One pass preprocessing for token-based source code clone detection","display_name":"One pass preprocessing for token-based source code clone detection","publication_year":2014,"publication_date":"2014-10-01","ids":{"openalex":"https://openalex.org/W2320803981","doi":"https://doi.org/10.1109/icawst.2014.6981824","mag":"2320803981"},"language":"en","primary_location":{"id":"doi:10.1109/icawst.2014.6981824","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icawst.2014.6981824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE 6th International Conference on Awareness Science and Technology (iCAST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053973508","display_name":"Dingkun Li","orcid":"https://orcid.org/0000-0002-4297-0844"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Dingkun Li","raw_affiliation_strings":["Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea"],"affiliations":[{"raw_affiliation_string":"Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea","institution_ids":["https://openalex.org/I163753206"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055838198","display_name":"Minghao Piao","orcid":"https://orcid.org/0000-0001-7348-0752"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minghao Piao","raw_affiliation_strings":["Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea"],"affiliations":[{"raw_affiliation_string":"Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea","institution_ids":["https://openalex.org/I163753206"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057942974","display_name":"Ho Sun Shon","orcid":"https://orcid.org/0000-0002-6717-7869"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ho Sun Shon","raw_affiliation_strings":["Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea"],"affiliations":[{"raw_affiliation_string":"Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea","institution_ids":["https://openalex.org/I163753206"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043932631","display_name":"Keun Ho Ryu","orcid":"https://orcid.org/0000-0003-0394-9054"},"institutions":[{"id":"https://openalex.org/I141591182","display_name":"University of Aizu","ror":"https://ror.org/02pg0e883","country_code":"JP","type":"education","lineage":["https://openalex.org/I141591182"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keun Ho Ryu","raw_affiliation_strings":["Computer Industry LAB, Aizu University, Aizu-Wakamatsu, Japan"],"affiliations":[{"raw_affiliation_string":"Computer Industry LAB, Aizu University, Aizu-Wakamatsu, Japan","institution_ids":["https://openalex.org/I141591182"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071160608","display_name":"Incheon Paik","orcid":"https://orcid.org/0000-0002-7554-8180"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Incheon Paik","raw_affiliation_strings":["Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea"],"affiliations":[{"raw_affiliation_string":"Database/Bioinformatics Lab, Chungbuk National University, Cheongju, Sourth Korea","institution_ids":["https://openalex.org/I163753206"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5053973508"],"corresponding_institution_ids":["https://openalex.org/I163753206"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.29296316,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8170691132545471},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.7857239246368408},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7495238184928894},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5644407868385315},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5480426549911499},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.5455918908119202},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.5236836671829224},{"id":"https://openalex.org/keywords/redundant-code","display_name":"Redundant code","score":0.4867699146270752},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.4223913252353668},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4162508249282837},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.38379037380218506},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.352449893951416},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.3201141357421875},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.2826496362686157},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.10721412301063538}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8170691132545471},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.7857239246368408},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7495238184928894},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5644407868385315},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5480426549911499},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.5455918908119202},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5236836671829224},{"id":"https://openalex.org/C151578736","wikidata":"https://www.wikidata.org/wiki/Q1251793","display_name":"Redundant code","level":4,"score":0.4867699146270752},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.4223913252353668},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4162508249282837},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.38379037380218506},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.352449893951416},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3201141357421875},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.2826496362686157},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.10721412301063538},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icawst.2014.6981824","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icawst.2014.6981824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE 6th International Conference on Awareness Science and Technology (iCAST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6000000238418579,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1587010832","https://openalex.org/W2017198493","https://openalex.org/W2114210089","https://openalex.org/W2115846710","https://openalex.org/W2120326119","https://openalex.org/W2128737833","https://openalex.org/W2129476935","https://openalex.org/W2138756793","https://openalex.org/W2172244770","https://openalex.org/W3143312578","https://openalex.org/W3166095749","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2001919569","https://openalex.org/W2113128227","https://openalex.org/W632256878","https://openalex.org/W2491403535","https://openalex.org/W2240848044","https://openalex.org/W2479811461","https://openalex.org/W2104915799","https://openalex.org/W4311938462","https://openalex.org/W3162146186","https://openalex.org/W2763045826"],"abstract_inverted_index":{"Token-based":[0],"source":[1,12,44,71,92,105,131],"code":[2,13,45,72,93,132],"clones":[3],"detection":[4,21],"provides":[5],"a":[6,62],"promising":[7],"way":[8,64],"to":[9,65,89,128],"detect":[10],"the":[11,32,70,74,91,117,130],"duplication":[14],"and":[15,54,68,96,116,126,134],"re-dundancy.":[16],"While":[17],"preprocessing":[18],"of":[19,47,112],"clone":[20],"plays":[22],"an":[23],"important":[24],"role":[25],"in":[26],"KDD":[27],"for":[28,77],"further":[29],"processing":[30,42],"as":[31],"old":[33],"saying":[34],"goes:":[35],"well":[36],"begun":[37],"is":[38,51],"half":[39],"done.":[40],"However,":[41],"unstructured":[43],"files":[46,94,133],"large":[48,103],"software":[49],"systems":[50],"really":[52],"challenging":[53],"time":[55],"or":[56],"space":[57],"consuming.":[58],"This":[59],"paper":[60,99],"introduces":[61],"novel":[63],"clean,":[66],"tokenize":[67],"transform":[69],"into":[73],"appropriate":[75],"form":[76],"mining.":[78],"A":[79],"tool":[80,122],"called":[81],"OPP":[82],"(One":[83],"Pass":[84],"Preprocessor)":[85],"has":[86,123],"been":[87],"developed":[88],"preprocess":[90,129],"efficiently":[95],"flexibly.":[97],"The":[98],"experimented":[100],"on":[101],"three":[102],"open":[104],"projects":[106],"like":[107],"Wildfly1.02":[108],"Linux":[109],"core-3.6,":[110],"VTK":[111],"different":[113],"host":[114],"languages,":[115],"result":[118],"showed":[119],"that":[120],"our":[121],"great":[124],"power":[125],"flexibility":[127],"products":[135],"high":[136],"quality":[137],"output.":[138]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
