{"id":"https://openalex.org/W2156468933","doi":"https://doi.org/10.1145/2611778","title":"A Survey and Classification of Storage Deduplication Systems","display_name":"A Survey and Classification of Storage Deduplication Systems","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W2156468933","doi":"https://doi.org/10.1145/2611778","mag":"2156468933"},"language":"en","primary_location":{"id":"doi:10.1145/2611778","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2611778","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://repositorio.inesctec.pt/handle/123456789/4154","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054409160","display_name":"Jo\u00e3o Paulo","orcid":"https://orcid.org/0000-0001-9752-2822"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]},{"id":"https://openalex.org/I99682543","display_name":"University of Minho","ror":"https://ror.org/037wpkx04","country_code":"PT","type":"education","lineage":["https://openalex.org/I99682543"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Jo\u00e3o Paulo","raw_affiliation_strings":["High-Assurance Software Lab (HASLab), INESC TEC &amp; University of Minho, Braga, Portugal"],"affiliations":[{"raw_affiliation_string":"High-Assurance Software Lab (HASLab), INESC TEC &amp; University of Minho, Braga, Portugal","institution_ids":["https://openalex.org/I99682543","https://openalex.org/I4210166615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065441606","display_name":"Jos\u00e9 Pereira","orcid":"https://orcid.org/0000-0002-3341-9217"},"institutions":[{"id":"https://openalex.org/I99682543","display_name":"University of Minho","ror":"https://ror.org/037wpkx04","country_code":"PT","type":"education","lineage":["https://openalex.org/I99682543"]},{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 Pereira","raw_affiliation_strings":["High-Assurance Software Lab (HASLab), INESC TEC &amp; University of Minho, Braga, Portugal"],"affiliations":[{"raw_affiliation_string":"High-Assurance Software Lab (HASLab), INESC TEC &amp; University of Minho, Braga, Portugal","institution_ids":["https://openalex.org/I99682543","https://openalex.org/I4210166615"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5054409160"],"corresponding_institution_ids":["https://openalex.org/I4210166615","https://openalex.org/I99682543"],"apc_list":null,"apc_paid":null,"fwci":37.9154,"has_fulltext":false,"cited_by_count":137,"citation_normalized_percentile":{"value":0.997189,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"47","issue":"1","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9112287163734436},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8671663999557495},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5888168811798096},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.5877323746681213},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5541321039199829},{"id":"https://openalex.org/keywords/computer-data-storage","display_name":"Computer data storage","score":0.5247766375541687},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5187144875526428},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.49196478724479675},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4619695544242859},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.4514559209346771},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.43700164556503296},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37783634662628174},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3723721504211426},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.29040804505348206},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.12361666560173035},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0942225456237793}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9112287163734436},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8671663999557495},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5888168811798096},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.5877323746681213},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5541321039199829},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.5247766375541687},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5187144875526428},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.49196478724479675},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4619695544242859},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.4514559209346771},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.43700164556503296},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37783634662628174},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3723721504211426},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.29040804505348206},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.12361666560173035},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0942225456237793},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/2611778","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2611778","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},{"id":"pmh:oai:repositorio.inesctec.pt:123456789/4154","is_oa":true,"landing_page_url":"http://repositorio.inesctec.pt/handle/123456789/4154","pdf_url":null,"source":{"id":"https://openalex.org/S4306402433","display_name":"Portuguese National Funding Agency for Science, Research and Technology (RCAAP Project by FCT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"},{"id":"pmh:oai:repositorium.sdum.uminho.pt:1822/34973","is_oa":true,"landing_page_url":"http://hdl.handle.net/1822/34973","pdf_url":null,"source":{"id":"https://openalex.org/S4306401240","display_name":"Reposit\u00f3riUM (Universidade do Minho)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99682543","host_organization_name":"University of Minho","host_organization_lineage":["https://openalex.org/I99682543"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"},{"id":"pmh:oai:repositorium.uminho.pt:1822/34973","is_oa":true,"landing_page_url":"https://hdl.handle.net/1822/34973","pdf_url":null,"source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:repositorio.inesctec.pt:123456789/4154","is_oa":true,"landing_page_url":"http://repositorio.inesctec.pt/handle/123456789/4154","pdf_url":null,"source":{"id":"https://openalex.org/S4306402433","display_name":"Portuguese National Funding Agency for Science, Research and Technology (RCAAP Project by FCT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W6847055","https://openalex.org/W26020480","https://openalex.org/W69510097","https://openalex.org/W89823361","https://openalex.org/W125480971","https://openalex.org/W126194223","https://openalex.org/W162607394","https://openalex.org/W174289249","https://openalex.org/W178328500","https://openalex.org/W182307532","https://openalex.org/W193648907","https://openalex.org/W200233886","https://openalex.org/W1474119323","https://openalex.org/W1490390347","https://openalex.org/W1508926177","https://openalex.org/W1521407587","https://openalex.org/W1541467180","https://openalex.org/W1542686980","https://openalex.org/W1568472162","https://openalex.org/W1576397915","https://openalex.org/W1589276590","https://openalex.org/W1601735929","https://openalex.org/W1609518033","https://openalex.org/W1614703486","https://openalex.org/W1838765419","https://openalex.org/W1856507064","https://openalex.org/W1929726975","https://openalex.org/W1965572530","https://openalex.org/W1969126835","https://openalex.org/W1969949656","https://openalex.org/W1971212200","https://openalex.org/W1998817683","https://openalex.org/W2003696547","https://openalex.org/W2004286258","https://openalex.org/W2006816934","https://openalex.org/W2009191935","https://openalex.org/W2013592197","https://openalex.org/W2028546326","https://openalex.org/W2029673812","https://openalex.org/W2033062224","https://openalex.org/W2038908732","https://openalex.org/W2047484077","https://openalex.org/W2050140640","https://openalex.org/W2051751274","https://openalex.org/W2055899630","https://openalex.org/W2056980397","https://openalex.org/W2058189720","https://openalex.org/W2058716449","https://openalex.org/W2061194392","https://openalex.org/W2071768826","https://openalex.org/W2077850713","https://openalex.org/W2096670209","https://openalex.org/W2097964935","https://openalex.org/W2110824055","https://openalex.org/W2121542813","https://openalex.org/W2128593562","https://openalex.org/W2129610259","https://openalex.org/W2132069633","https://openalex.org/W2132627996","https://openalex.org/W2133806194","https://openalex.org/W2134792144","https://openalex.org/W2135348622","https://openalex.org/W2143882267","https://openalex.org/W2146532238","https://openalex.org/W2147755199","https://openalex.org/W2149509741","https://openalex.org/W2149993626","https://openalex.org/W2152921908","https://openalex.org/W2153158407","https://openalex.org/W2156719566","https://openalex.org/W2157240622","https://openalex.org/W2169486917","https://openalex.org/W2281363974","https://openalex.org/W2583401777","https://openalex.org/W2585172182"],"related_works":["https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W2587188779","https://openalex.org/W2179326652","https://openalex.org/W3008013744","https://openalex.org/W2884042376","https://openalex.org/W2615141153","https://openalex.org/W2077135008","https://openalex.org/W223870977","https://openalex.org/W2739791894"],"abstract_inverted_index":{"The":[0,86],"automatic":[1],"elimination":[2],"of":[3,81,89,95,126,136],"duplicate":[4],"data":[5],"in":[6,149],"a":[7,93,129],"storage":[8,23,32,59,151],"system,":[9],"commonly":[10],"known":[11],"as":[12,17],"deduplication,":[13],"is":[14,55,74,92],"increasingly":[15],"accepted":[16],"an":[18],"effective":[19],"technique":[20],"to":[21,30,45,53,67,99,104],"reduce":[22],"costs.":[24],"Thus,":[25],"it":[26],"has":[27],"been":[28,141],"applied":[29],"different":[31,68,121],"types,":[33,60],"including":[34],"archives":[35],"and":[36,43,65,70,84,113,118,143,157,163],"backups,":[37],"primary":[38],"storage,":[39],"within":[40],"solid-state":[41],"drives,":[42],"even":[44],"random":[46],"access":[47],"memory.":[48],"Although":[49],"the":[50,79,120],"general":[51],"approach":[52],"deduplication":[54,96],"shared":[56],"by":[57],"all":[58],"each":[61,125,150],"poses":[62],"specific":[63],"challenges":[64,148,156],"leads":[66],"trade-offs":[69],"solutions.":[71],"This":[72,115],"diversity":[73],"often":[75],"misunderstood,":[76],"thus":[77],"underestimating":[78],"relevance":[80],"new":[82],"research":[83,155],"development.":[85],"first":[87],"contribution":[88],"this":[90],"article":[91],"classification":[94,116],"systems":[97],"according":[98],"six":[100],"criteria":[101],"that":[102],"correspond":[103],"key":[105],"design":[106,138,159],"decisions:":[107],"granularity,":[108],"locality,":[109],"timing,":[110],"indexing,":[111],"technique,":[112],"scope.":[114],"identifies":[117],"describes":[119],"approaches":[122],"used":[123],"for":[124,147],"them.":[127],"As":[128],"second":[130],"contribution,":[131],"we":[132],"describe":[133],"which":[134],"combinations":[135],"these":[137],"decisions":[139],"have":[140],"proposed":[142],"found":[144],"more":[145],"useful":[146],"type.":[152],"Finally,":[153],"outstanding":[154],"unexplored":[158],"points":[160],"are":[161],"identified":[162],"discussed.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":14},{"year":2016,"cited_by_count":20},{"year":2015,"cited_by_count":11},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
