{"id":"https://openalex.org/W2475932436","doi":"https://doi.org/10.1109/jproc.2016.2571298","title":"A Comprehensive Study of the Past, Present, and Future of Data Deduplication","display_name":"A Comprehensive Study of the Past, Present, and Future of Data Deduplication","publication_year":2016,"publication_date":"2016-08-02","ids":{"openalex":"https://openalex.org/W2475932436","doi":"https://doi.org/10.1109/jproc.2016.2571298","mag":"2475932436"},"language":"en","primary_location":{"id":"doi:10.1109/jproc.2016.2571298","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jproc.2016.2571298","pdf_url":null,"source":{"id":"https://openalex.org/S68686220","display_name":"Proceedings of the IEEE","issn_l":"0018-9219","issn":["0018-9219","1558-2256"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the IEEE","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050928208","display_name":"Wen Xia","orcid":"https://orcid.org/0000-0003-4093-6391"},"institutions":[{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Xia","raw_affiliation_strings":["School of Computer Science and Technology, Wuhan National Laboratory for Optoelectronics, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-4093-6391","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Wuhan National Laboratory for Optoelectronics, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054285749","display_name":"Hong Jiang","orcid":"https://orcid.org/0000-0002-1477-9751"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hong Jiang","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Texas at Arlington, Arlington, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Texas at Arlington, Arlington, TX, USA","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057421680","display_name":"Dan Feng","orcid":"https://orcid.org/0000-0002-4674-6006"},"institutions":[{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Feng","raw_affiliation_strings":["Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069996857","display_name":"Fred Douglis","orcid":"https://orcid.org/0000-0003-2472-0339"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fred Douglis","raw_affiliation_strings":["EMC Corporation, Princeton, NJ, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"EMC Corporation, Princeton, NJ, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091766663","display_name":"Philip Shilane","orcid":"https://orcid.org/0000-0003-1235-0502"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Philip Shilane","raw_affiliation_strings":["EMC Corporation, Princeton, NJ, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"EMC Corporation, Princeton, NJ, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088998781","display_name":"Yu Hua","orcid":"https://orcid.org/0000-0001-7730-3796"},"institutions":[{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Hua","raw_affiliation_strings":["Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101590323","display_name":"Min Fu","orcid":"https://orcid.org/0000-0002-8918-9414"},"institutions":[{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Fu","raw_affiliation_strings":["Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100387027","display_name":"Yucheng Zhang","orcid":"https://orcid.org/0000-0001-7716-1214"},"institutions":[{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yucheng Zhang","raw_affiliation_strings":["Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060665617","display_name":"Yukun Zhou","orcid":"https://orcid.org/0000-0003-0774-462X"},"institutions":[{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yukun Zhou","raw_affiliation_strings":["Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan National Laboratory for Optoelectronics, the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":24.2258,"has_fulltext":false,"cited_by_count":308,"citation_normalized_percentile":{"value":0.99621539,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"104","issue":"9","first_page":"1681","last_page":"1710"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9862653017044067},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8121270537376404},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5312141180038452},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4670649468898773},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4637359380722046},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.44843924045562744},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.4322417378425598},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37461555004119873},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14671266078948975}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9862653017044067},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8121270537376404},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5312141180038452},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4670649468898773},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4637359380722046},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.44843924045562744},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.4322417378425598},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37461555004119873},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14671266078948975}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jproc.2016.2571298","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jproc.2016.2571298","pdf_url":null,"source":{"id":"https://openalex.org/S68686220","display_name":"Proceedings of the IEEE","issn_l":"0018-9219","issn":["0018-9219","1558-2256"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the IEEE","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":246,"referenced_works":["https://openalex.org/W5335935","https://openalex.org/W6847055","https://openalex.org/W22807665","https://openalex.org/W24740120","https://openalex.org/W26020480","https://openalex.org/W38366299","https://openalex.org/W66778775","https://openalex.org/W69510097","https://openalex.org/W89823361","https://openalex.org/W96989745","https://openalex.org/W120798314","https://openalex.org/W125480971","https://openalex.org/W126194223","https://openalex.org/W137089333","https://openalex.org/W152583591","https://openalex.org/W169466422","https://openalex.org/W174289249","https://openalex.org/W178328500","https://openalex.org/W182307532","https://openalex.org/W184194109","https://openalex.org/W200233886","https://openalex.org/W841409518","https://openalex.org/W939203190","https://openalex.org/W1418704970","https://openalex.org/W1437397736","https://openalex.org/W1445823765","https://openalex.org/W1474119323","https://openalex.org/W1490390347","https://openalex.org/W1519809459","https://openalex.org/W1521407587","https://openalex.org/W1521996498","https://openalex.org/W1541467180","https://openalex.org/W1542686980","https://openalex.org/W1545493325","https://openalex.org/W1547612978","https://openalex.org/W1547966682","https://openalex.org/W1553098517","https://openalex.org/W1566345534","https://openalex.org/W1568472162","https://openalex.org/W1576397915","https://openalex.org/W1608647889","https://openalex.org/W1609518033","https://openalex.org/W1614703486","https://openalex.org/W1626210897","https://openalex.org/W1639305476","https://openalex.org/W1792103217","https://openalex.org/W1847423537","https://openalex.org/W1856507064","https://openalex.org/W1871705262","https://openalex.org/W1900412779","https://openalex.org/W1901266868","https://openalex.org/W1929726975","https://openalex.org/W1934045625","https://openalex.org/W1947728686","https://openalex.org/W1965572530","https://openalex.org/W1969067141","https://openalex.org/W1969126835","https://openalex.org/W1969335064","https://openalex.org/W1969949656","https://openalex.org/W1971212200","https://openalex.org/W1975868314","https://openalex.org/W1976024527","https://openalex.org/W1979162731","https://openalex.org/W1982579475","https://openalex.org/W1985943827","https://openalex.org/W1986313415","https://openalex.org/W1987225815","https://openalex.org/W1988161332","https://openalex.org/W1988596907","https://openalex.org/W1993865637","https://openalex.org/W1994360333","https://openalex.org/W1995099886","https://openalex.org/W1996198089","https://openalex.org/W1997349095","https://openalex.org/W1998817683","https://openalex.org/W2003696547","https://openalex.org/W2004286258","https://openalex.org/W2005611725","https://openalex.org/W2006196087","https://openalex.org/W2008185810","https://openalex.org/W2009191935","https://openalex.org/W2010150514","https://openalex.org/W2012359782","https://openalex.org/W2013592197","https://openalex.org/W2016947394","https://openalex.org/W2019531293","https://openalex.org/W2020928798","https://openalex.org/W2023779084","https://openalex.org/W2024171325","https://openalex.org/W2027016098","https://openalex.org/W2028546326","https://openalex.org/W2029433591","https://openalex.org/W2029673812","https://openalex.org/W2033062224","https://openalex.org/W2035415465","https://openalex.org/W2038908732","https://openalex.org/W2041404167","https://openalex.org/W2045202139","https://openalex.org/W2047439460","https://openalex.org/W2047484077","https://openalex.org/W2050140640","https://openalex.org/W2053449578","https://openalex.org/W2054570356","https://openalex.org/W2055899630","https://openalex.org/W2056980397","https://openalex.org/W2059595900","https://openalex.org/W2060108852","https://openalex.org/W2063547430","https://openalex.org/W2064698638","https://openalex.org/W2066529295","https://openalex.org/W2071417930","https://openalex.org/W2073370301","https://openalex.org/W2076214235","https://openalex.org/W2078449790","https://openalex.org/W2081037744","https://openalex.org/W2083570493","https://openalex.org/W2083695892","https://openalex.org/W2088271809","https://openalex.org/W2093274905","https://openalex.org/W2095763432","https://openalex.org/W2096787381","https://openalex.org/W2097119711","https://openalex.org/W2097382465","https://openalex.org/W2099516661","https://openalex.org/W2103953076","https://openalex.org/W2104037213","https://openalex.org/W2106358932","https://openalex.org/W2107200720","https://openalex.org/W2107551255","https://openalex.org/W2107745473","https://openalex.org/W2107927941","https://openalex.org/W2110322986","https://openalex.org/W2110423232","https://openalex.org/W2110824055","https://openalex.org/W2111238480","https://openalex.org/W2112671267","https://openalex.org/W2112939204","https://openalex.org/W2113154035","https://openalex.org/W2114553826","https://openalex.org/W2115599946","https://openalex.org/W2116314110","https://openalex.org/W2118217749","https://openalex.org/W2118670307","https://openalex.org/W2119172358","https://openalex.org/W2119631914","https://openalex.org/W2121542813","https://openalex.org/W2122962290","https://openalex.org/W2123612386","https://openalex.org/W2123845384","https://openalex.org/W2124632914","https://openalex.org/W2125914539","https://openalex.org/W2126381420","https://openalex.org/W2128593562","https://openalex.org/W2129167380","https://openalex.org/W2129652681","https://openalex.org/W2132069633","https://openalex.org/W2132627996","https://openalex.org/W2133806194","https://openalex.org/W2134792144","https://openalex.org/W2135050683","https://openalex.org/W2142606934","https://openalex.org/W2146889157","https://openalex.org/W2147407897","https://openalex.org/W2148048154","https://openalex.org/W2148885851","https://openalex.org/W2149509741","https://openalex.org/W2149993626","https://openalex.org/W2153158407","https://openalex.org/W2156468933","https://openalex.org/W2156719566","https://openalex.org/W2157240622","https://openalex.org/W2158935814","https://openalex.org/W2161234420","https://openalex.org/W2161488606","https://openalex.org/W2163336450","https://openalex.org/W2165257856","https://openalex.org/W2166667085","https://openalex.org/W2167002207","https://openalex.org/W2169502896","https://openalex.org/W2170564751","https://openalex.org/W2172174733","https://openalex.org/W2195080394","https://openalex.org/W2218192083","https://openalex.org/W2270082617","https://openalex.org/W2281363974","https://openalex.org/W2310350872","https://openalex.org/W2346147088","https://openalex.org/W2583349018","https://openalex.org/W2912601938","https://openalex.org/W2978725006","https://openalex.org/W3004726598","https://openalex.org/W4230077428","https://openalex.org/W4242744113","https://openalex.org/W4251291705","https://openalex.org/W4256025236","https://openalex.org/W6600228218","https://openalex.org/W6600884019","https://openalex.org/W6601060166","https://openalex.org/W6602654064","https://openalex.org/W6602837329","https://openalex.org/W6603735688","https://openalex.org/W6604953292","https://openalex.org/W6605032692","https://openalex.org/W6605053430","https://openalex.org/W6605588685","https://openalex.org/W6606947906","https://openalex.org/W6607057542","https://openalex.org/W6607150254","https://openalex.org/W6607453207","https://openalex.org/W6607470253","https://openalex.org/W6608138927","https://openalex.org/W6628219055","https://openalex.org/W6628236356","https://openalex.org/W6628275343","https://openalex.org/W6629455298","https://openalex.org/W6630994405","https://openalex.org/W6631269574","https://openalex.org/W6632555794","https://openalex.org/W6634463810","https://openalex.org/W6636190696","https://openalex.org/W6636365882","https://openalex.org/W6638179364","https://openalex.org/W6638808162","https://openalex.org/W6638825795","https://openalex.org/W6639246177","https://openalex.org/W6639863922","https://openalex.org/W6640045128","https://openalex.org/W6640146480","https://openalex.org/W6647052414","https://openalex.org/W6663785121","https://openalex.org/W6674404242","https://openalex.org/W6674947391","https://openalex.org/W6677637646","https://openalex.org/W6678796428","https://openalex.org/W6679031692","https://openalex.org/W6679252032","https://openalex.org/W6679804201","https://openalex.org/W6681300249","https://openalex.org/W6681940154","https://openalex.org/W6681973884","https://openalex.org/W6683485282","https://openalex.org/W6683682350","https://openalex.org/W6688437023","https://openalex.org/W6693742233","https://openalex.org/W6695688771","https://openalex.org/W6698271472"],"related_works":["https://openalex.org/W4285245558","https://openalex.org/W1437397736","https://openalex.org/W2048606985","https://openalex.org/W2375056709","https://openalex.org/W2474612884","https://openalex.org/W4301155776","https://openalex.org/W2012359782","https://openalex.org/W2952411620","https://openalex.org/W3156188733","https://openalex.org/W2348132657"],"abstract_inverted_index":{"Data":[0],"deduplication,":[1,80,138],"an":[2],"efficient":[3,57],"approach":[4],"to":[5,19,52,92],"data":[6,29,79,89,98,123,137],"reduction,":[7],"has":[8],"gained":[9],"increasing":[10],"attention":[11],"and":[12,36,75,83,103,115,133,139,151,159],"popularity":[13],"in":[14,63,88],"large-scale":[15,64],"storage":[16,65,165],"systems":[17],"due":[18],"the":[20,31,59,73,85,93,97,106,109,117,130,144,156],"explosive":[21],"growth":[22],"of":[23,78,96,105,108,136,143],"digital":[24],"data.":[25],"It":[26],"eliminates":[27],"redundant":[28],"at":[30],"file":[32],"or":[33],"subfile":[34],"level":[35],"identifies":[37],"duplicate":[38],"content":[39],"by":[40],"its":[41],"cryptographically":[42],"secure":[43],"hash":[44],"signature":[45],"(i.e.,":[46],"collision-resistant":[47],"fingerprint),":[48],"which":[49],"is":[50],"shown":[51],"be":[53],"much":[54],"more":[55],"computationally":[56],"than":[58],"traditional":[60],"compression":[61],"approaches":[62],"systems.":[66,125,166],"In":[67,126],"this":[68],"paper,":[69],"we":[70,128,154],"first":[71],"review":[72],"background":[74],"key":[76,94],"features":[77],"then":[81],"summarize":[82],"classify":[84],"state-of-the-art":[86],"research":[87,150,161],"deduplication":[90,99,112,124,149],"according":[91],"workflow":[95],"process.":[100],"The":[101],"summary":[102],"taxonomy":[104],"state":[107],"art":[110],"on":[111],"help":[113],"identify":[114],"understand":[116],"most":[118],"important":[119],"design":[120],"considerations":[121],"for":[122,148],"addition,":[127],"discuss":[129],"main":[131],"applications":[132],"industry":[134],"trend":[135],"provide":[140],"a":[141],"list":[142],"publicly":[145],"available":[146],"sources":[147],"studies.":[152],"Finally,":[153],"outline":[155],"open":[157],"problems":[158],"future":[160],"directions":[162],"facing":[163],"deduplication-based":[164]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":44},{"year":2024,"cited_by_count":34},{"year":2023,"cited_by_count":32},{"year":2022,"cited_by_count":36},{"year":2021,"cited_by_count":37},{"year":2020,"cited_by_count":32},{"year":2019,"cited_by_count":31},{"year":2018,"cited_by_count":31},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
