{"id":"https://openalex.org/W2810877720","doi":"https://doi.org/10.1109/fskd.2017.8393004","title":"Classification of file duplication by hierarchical clustering based on similarity relations","display_name":"Classification of file duplication by hierarchical clustering based on similarity relations","publication_year":2017,"publication_date":"2017-07-01","ids":{"openalex":"https://openalex.org/W2810877720","doi":"https://doi.org/10.1109/fskd.2017.8393004","mag":"2810877720"},"language":"en","primary_location":{"id":"doi:10.1109/fskd.2017.8393004","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fskd.2017.8393004","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 13th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery (ICNC-FSKD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050917669","display_name":"Manop Phankokkruad","orcid":"https://orcid.org/0000-0001-9516-0901"},"institutions":[{"id":"https://openalex.org/I91538806","display_name":"King Mongkut's Institute of Technology Ladkrabang","ror":"https://ror.org/055mf0v62","country_code":"TH","type":"education","lineage":["https://openalex.org/I91538806"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Manop Phankokkruad","raw_affiliation_strings":["Faculty of Information Technology, King Mongkut's Institute of Technology Ladkrabang, Bangkok, Thailand"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, King Mongkut's Institute of Technology Ladkrabang, Bangkok, Thailand","institution_ids":["https://openalex.org/I91538806"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5050917669"],"corresponding_institution_ids":["https://openalex.org/I91538806"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.33137664,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1598","last_page":"1603"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7057774662971497},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5554516911506653},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5031539797782898},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.485070139169693},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.46965232491493225},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4179188311100006},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41637828946113586},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34443050622940063},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.19636425375938416}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7057774662971497},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5554516911506653},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5031539797782898},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.485070139169693},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.46965232491493225},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4179188311100006},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41637828946113586},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34443050622940063},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19636425375938416},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fskd.2017.8393004","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fskd.2017.8393004","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 13th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery (ICNC-FSKD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1512285202","https://openalex.org/W1609518033","https://openalex.org/W1698439592","https://openalex.org/W1937758960","https://openalex.org/W1977712153","https://openalex.org/W1997953464","https://openalex.org/W2016381774","https://openalex.org/W2049017883","https://openalex.org/W2049386694","https://openalex.org/W2087064593","https://openalex.org/W2097169398","https://openalex.org/W2100591395","https://openalex.org/W2108991785","https://openalex.org/W2123812100","https://openalex.org/W2128698639","https://openalex.org/W2128888088","https://openalex.org/W2129936293","https://openalex.org/W2156090204","https://openalex.org/W2182152528","https://openalex.org/W2528237000","https://openalex.org/W2599925391","https://openalex.org/W2616910264","https://openalex.org/W3146259567","https://openalex.org/W6630818679","https://openalex.org/W6636190696","https://openalex.org/W6678850886"],"related_works":["https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W2587188779","https://openalex.org/W3132870970","https://openalex.org/W2943088381","https://openalex.org/W4296125805","https://openalex.org/W4385804830","https://openalex.org/W2144348063","https://openalex.org/W2074021203","https://openalex.org/W3200375535"],"abstract_inverted_index":{"This":[0,20],"paper":[1],"have":[2],"proposed":[3],"the":[4,7,12,16,23,26,31,36,40,46,49,54,59,62,69,72,85,92,97,107,110,113,120,127,133,140],"classification":[5],"of":[6,18,28,39,61,77,109,126,135],"duplicate":[8],"file":[9],"by":[10,30,83],"measuring":[11],"similarity":[13,98],"score":[14],"between":[15,25,48,99,122],"couple":[17],"files.":[19,141],"work":[21,66,104,131],"examined":[22],"distance":[24,42],"pairwise":[27],"files":[29,55,100],"Smith-Waterman":[32,93],"algorithm.":[33],"In":[34],"addition,":[35],"make":[37],"use":[38],"Euclidean":[41],"matrix":[43],"could":[44,67,95,105],"identify":[45],"relativity":[47,108],"persons":[50],"who":[51,79,115],"often":[52],"copies":[53],"each":[56],"other.":[57],"Since":[58],"regularity":[60],"duplication":[63],"happens,":[64],"this":[65,103,130],"classify":[68],"proximity":[70],"to":[71],"persons,":[73,111],"and":[74,119],"a":[75],"group":[76],"person":[78,114,121,138],"positioned":[80,116],"closely":[81,117],"together":[82],"applying":[84],"hierarchical":[86],"clustering.":[87],"The":[88],"result":[89],"revealed":[90],"that":[91,137],"algorithms":[94],"measure":[96],"effectively.":[101],"Also,":[102],"analyze":[106],"classifies":[112],"together,":[118],"nearest":[123],"related":[124],"members":[125],"group.":[128],"Finally,":[129],"represented":[132],"amount":[134],"time":[136],"duplicated":[139]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
