{"id":"https://openalex.org/W2606291150","doi":"https://doi.org/10.1109/msst.2016.7897080","title":"A long-term user-centric analysis of deduplication patterns","display_name":"A long-term user-centric analysis of deduplication patterns","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2606291150","doi":"https://doi.org/10.1109/msst.2016.7897080","mag":"2606291150"},"language":"en","primary_location":{"id":"doi:10.1109/msst.2016.7897080","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msst.2016.7897080","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 32nd Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059220776","display_name":"Zhen Sun","orcid":"https://orcid.org/0000-0001-8954-3287"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Zhen Sun","raw_affiliation_strings":["College of Computer, National University of Defense Technology, China","Stony Brook University"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Stony Brook University","institution_ids":["https://openalex.org/I59553526"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056117639","display_name":"Geoff Kuenning","orcid":"https://orcid.org/0000-0002-3882-2072"},"institutions":[{"id":"https://openalex.org/I133543626","display_name":"Harvey Mudd College","ror":"https://ror.org/025ecfn45","country_code":"US","type":"education","lineage":["https://openalex.org/I133543626"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Geoff Kuenning","raw_affiliation_strings":["Harvey Mudd College"],"affiliations":[{"raw_affiliation_string":"Harvey Mudd College","institution_ids":["https://openalex.org/I133543626"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037667483","display_name":"Sonam Mandal","orcid":"https://orcid.org/0000-0002-2108-304X"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sonam Mandal","raw_affiliation_strings":["Stony Brook University"],"affiliations":[{"raw_affiliation_string":"Stony Brook University","institution_ids":["https://openalex.org/I59553526"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091766663","display_name":"Philip Shilane","orcid":"https://orcid.org/0000-0003-1235-0502"},"institutions":[{"id":"https://openalex.org/I4210092318","display_name":"Education Management Corporation (United States)","ror":"https://ror.org/00df8wr13","country_code":"US","type":"company","lineage":["https://openalex.org/I4210092318"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip Shilane","raw_affiliation_strings":["EMC Corporation"],"affiliations":[{"raw_affiliation_string":"EMC Corporation","institution_ids":["https://openalex.org/I4210092318"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045054131","display_name":"Vasily Tarasov","orcid":"https://orcid.org/0000-0003-1424-9977"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vasily Tarasov","raw_affiliation_strings":["IBM Research\u2014Almaden"],"affiliations":[{"raw_affiliation_string":"IBM Research\u2014Almaden","institution_ids":["https://openalex.org/I4210085935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023506057","display_name":"Nong Xiao","orcid":"https://orcid.org/0000-0002-2166-977X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nong Xiao","raw_affiliation_strings":["College of Computer, National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011685551","display_name":"kand Erez Zadok","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"kand Erez Zadok","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5059220776"],"corresponding_institution_ids":["https://openalex.org/I170215575","https://openalex.org/I59553526"],"apc_list":null,"apc_paid":null,"fwci":4.8578,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.95437982,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9652684926986694},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.9461885690689087},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8111183643341064},{"id":"https://openalex.org/keywords/snapshot","display_name":"Snapshot (computer storage)","score":0.6227385401725769},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4864747226238251},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.43018779158592224}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9652684926986694},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.9461885690689087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8111183643341064},{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.6227385401725769},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4864747226238251},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.43018779158592224},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/msst.2016.7897080","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msst.2016.7897080","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 32nd Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335773","display_name":"National High-tech Research and Development Program","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W5335935","https://openalex.org/W22807665","https://openalex.org/W26020480","https://openalex.org/W69510097","https://openalex.org/W120798314","https://openalex.org/W182307532","https://openalex.org/W200233886","https://openalex.org/W1445823765","https://openalex.org/W1474119323","https://openalex.org/W1521407587","https://openalex.org/W1545493325","https://openalex.org/W1599186236","https://openalex.org/W1626210897","https://openalex.org/W1856507064","https://openalex.org/W1963568712","https://openalex.org/W1969126835","https://openalex.org/W1971212200","https://openalex.org/W2016947394","https://openalex.org/W2036874012","https://openalex.org/W2055899630","https://openalex.org/W2088271809","https://openalex.org/W2110322986","https://openalex.org/W2138061848","https://openalex.org/W2147407897","https://openalex.org/W2153158407","https://openalex.org/W2156865561","https://openalex.org/W2170564751","https://openalex.org/W2281363974","https://openalex.org/W2293794372","https://openalex.org/W2408052048","https://openalex.org/W2570363240","https://openalex.org/W2585172182","https://openalex.org/W2642383336","https://openalex.org/W4230077428","https://openalex.org/W6600228218","https://openalex.org/W6600884019","https://openalex.org/W6602837329","https://openalex.org/W6604953292","https://openalex.org/W6607453207","https://openalex.org/W6608138927","https://openalex.org/W6628236356","https://openalex.org/W6631269574","https://openalex.org/W6635789628","https://openalex.org/W6638808162","https://openalex.org/W6676700842","https://openalex.org/W6684883750","https://openalex.org/W6695688771","https://openalex.org/W6713527140"],"related_works":["https://openalex.org/W4285245558","https://openalex.org/W1437397736","https://openalex.org/W2048606985","https://openalex.org/W2375056709","https://openalex.org/W2474612884","https://openalex.org/W4301155776","https://openalex.org/W2012359782","https://openalex.org/W2952411620","https://openalex.org/W3156188733","https://openalex.org/W2348132657"],"abstract_inverted_index":{"Deduplication":[0],"has":[1],"become":[2],"essential":[3,78],"in":[4,94,103],"disk-based":[5],"backup":[6,16,41],"systems,":[7],"but":[8],"there":[9],"have":[10],"been":[11],"few":[12],"long-term":[13],"studies":[14,20],"of":[15,23,38,53,77,96],"workloads.":[17],"Most":[18],"past":[19],"either":[21],"were":[22],"a":[24,31,40,56,75,117],"small":[25],"static":[26],"snapshot":[27],"or":[28],"covered":[29],"only":[30],"short":[32],"period":[33],"that":[34,110],"was":[35,84],"not":[36],"representative":[37],"how":[39],"system":[42],"evolves":[43],"over":[44,64],"time.":[45],"For":[46],"this":[47,126],"paper,":[48],"we":[49,99],"collected":[50],"21":[51],"months":[52],"data":[54,72,109],"from":[55],"shared":[57],"user":[58,86],"file":[59],"system;":[60],"33":[61],"users":[62,112],"and":[63,92,128],"4,000":[65],"snapshots":[66],"are":[67],"covered.":[68],"We":[69,124],"analyzed":[70],"the":[71,108],"set":[73],"for":[74,131],"variety":[76],"characteristics.":[79],"However,":[80],"our":[81,97],"primary":[82],"focus":[83],"individual":[85],"data.":[87],"Despite":[88],"apparently":[89],"similar":[90],"roles":[91],"behavior":[93,127],"all":[95],"users,":[98],"found":[100],"significant":[101],"differences":[102],"their":[104],"deduplication":[105,120,133],"ratios.":[106],"Moreover,":[107],"some":[111],"share":[113],"with":[114],"others":[115],"had":[116],"much":[118],"higher":[119],"ratio":[121],"than":[122],"average.":[123],"analyze":[125],"make":[129],"recommendations":[130],"future":[132],"systems":[134],"design.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":2}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
