{"id":"https://openalex.org/W2918722987","doi":"https://doi.org/10.1145/3369737","title":"Sketching Volume Capacities in Deduplicated Storage","display_name":"Sketching Volume Capacities in Deduplicated Storage","publication_year":2019,"publication_date":"2019-11-30","ids":{"openalex":"https://openalex.org/W2918722987","doi":"https://doi.org/10.1145/3369737","mag":"2918722987"},"language":"en","primary_location":{"id":"doi:10.1145/3369737","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3369737","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036936298","display_name":"Danny Harnik","orcid":"https://orcid.org/0009-0000-0614-6543"},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Danny Harnik","raw_affiliation_strings":["IBM Research, Givatayim, Israel","IBM research"],"affiliations":[{"raw_affiliation_string":"IBM Research, Givatayim, Israel","institution_ids":["https://openalex.org/I4210167297"]},{"raw_affiliation_string":"IBM research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001422326","display_name":"Moshik Hershcovitch","orcid":"https://orcid.org/0000-0002-4826-4174"},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Moshik Hershcovitch","raw_affiliation_strings":["IBM Research, Givatayim, Israel","IBM research"],"affiliations":[{"raw_affiliation_string":"IBM Research, Givatayim, Israel","institution_ids":["https://openalex.org/I4210167297"]},{"raw_affiliation_string":"IBM research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081943985","display_name":"Yosef Shatsky","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yosef Shatsky","raw_affiliation_strings":["IBM Systems, Givatayim, Israel","IBM Systems#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Systems, Givatayim, Israel","institution_ids":[]},{"raw_affiliation_string":"IBM Systems#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057109268","display_name":"Amir Epstein","orcid":null},"institutions":[{"id":"https://openalex.org/I135458274","display_name":"Citigroup","ror":"https://ror.org/032xgdx47","country_code":"US","type":"other","lineage":["https://openalex.org/I135458274"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amir Epstein","raw_affiliation_strings":["Citi Innovation Lab TLV, Israel","Citi Innovation Lab TLV"],"affiliations":[{"raw_affiliation_string":"Citi Innovation Lab TLV, Israel","institution_ids":[]},{"raw_affiliation_string":"Citi Innovation Lab TLV","institution_ids":["https://openalex.org/I135458274"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056976579","display_name":"Ronen I. Kat","orcid":"https://orcid.org/0009-0007-9350-2032"},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Ronen Kat","raw_affiliation_strings":["IBM Research, Givatayim, Israel","IBM research"],"affiliations":[{"raw_affiliation_string":"IBM Research, Givatayim, Israel","institution_ids":["https://openalex.org/I4210167297"]},{"raw_affiliation_string":"IBM research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5036936298"],"corresponding_institution_ids":["https://openalex.org/I4210167297"],"apc_list":null,"apc_paid":null,"fwci":0.3537,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60807616,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"15","issue":"4","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.7119390964508057},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7053675651550293},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.659503698348999},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.5629019141197205},{"id":"https://openalex.org/keywords/computer-data-storage","display_name":"Computer data storage","score":0.5378109812736511},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.449375182390213},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4047990143299103},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14278453588485718},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.11430874466896057}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.7119390964508057},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7053675651550293},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.659503698348999},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.5629019141197205},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.5378109812736511},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.449375182390213},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4047990143299103},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14278453588485718},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.11430874466896057},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3369737","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3369737","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},{"id":"mag:2918722987","is_oa":false,"landing_page_url":"https://www.usenix.org/system/files/fast19-harnik.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306418422","display_name":"File and Storage Technologies","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"File and Storage Technologies","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W26020480","https://openalex.org/W1785933978","https://openalex.org/W1931046221","https://openalex.org/W1969126835","https://openalex.org/W1984325555","https://openalex.org/W2025051251","https://openalex.org/W2031252661","https://openalex.org/W2035415465","https://openalex.org/W2055899630","https://openalex.org/W2107745473","https://openalex.org/W2760160636","https://openalex.org/W6648424669"],"related_works":["https://openalex.org/W3004612141","https://openalex.org/W2548944793","https://openalex.org/W2268316875","https://openalex.org/W2731413593","https://openalex.org/W3105976791","https://openalex.org/W2035950297","https://openalex.org/W2291143090","https://openalex.org/W3037111861","https://openalex.org/W2730533293","https://openalex.org/W2218192083","https://openalex.org/W1985472778","https://openalex.org/W1485781593","https://openalex.org/W2066400427","https://openalex.org/W1996353447","https://openalex.org/W2009051782","https://openalex.org/W3091944480","https://openalex.org/W2754803563","https://openalex.org/W2132644419","https://openalex.org/W1815612875","https://openalex.org/W1565405831"],"abstract_inverted_index":{"The":[0],"adoption":[1],"of":[2,46,68,83,92],"deduplication":[3,183],"in":[4,38,95,184],"storage":[5,13,40,54,112,128,151],"systems":[6],"has":[7],"introduced":[8],"significant":[9],"new":[10,33],"challenges":[11],"for":[12,51,107,135,158,171],"management.":[14],"Specifically,":[15],"the":[16,84,93,96,169,176,185],"physical":[17,58,85],"capacities":[18,37],"associated":[19],"with":[20],"volumes":[21,69,94],"are":[22],"no":[23],"longer":[24],"readily":[25],"available.":[26],"In":[27],"this":[28,166],"work,":[29],"we":[30],"introduce":[31],"a":[32,53,64,75,119,148],"approach":[34],"to":[35,71,90,126],"analyzing":[36],"deduplicated":[39],"environments.":[41],"We":[42,130,162],"provide":[43,131],"sketch-based":[44],"estimations":[45,137],"fundamental":[47],"capacity":[48,105,117],"measures":[49],"required":[50],"managing":[52],"system:":[55],"How":[56],"much":[57,82,116],"space":[59,86],"would":[60,118],"be":[61,72,88],"reclaimed":[62],"if":[63],"volume":[65,108,120],"or":[66],"group":[67,121],"were":[70],"removed":[73],"from":[74],"system":[76,97],"(the":[77,98],"reclaimable":[78],"capacity)":[79],"and":[80,153,179],"how":[81,115,165],"should":[87],"attributed":[89,99],"each":[91],"capacity).":[100],"Our":[101,143],"methods":[102],"also":[103,163],"support":[104],"queries":[106],"groups":[109],"across":[110],"multiple":[111],"systems,":[113],"e.g.,":[114],"consume":[122],"after":[123],"being":[124],"migrated":[125],"another":[127],"system?":[129],"analytical":[132],"accuracy":[133],"guarantees":[134],"our":[136],"as":[138,140],"well":[139],"empirical":[141],"evaluations.":[142],"technology":[144],"is":[145],"integrated":[146],"into":[147],"prominent":[149],"all-flash":[150],"array":[152],"exhibits":[154],"high":[155],"performance":[156],"even":[157],"very":[159],"large":[160],"systems.":[161],"demonstrate":[164],"method":[167],"opens":[168],"door":[170],"performing":[172],"placement":[173],"decisions":[174],"at":[175],"data-center":[177],"level":[178],"obtaining":[180],"insights":[181],"on":[182],"field.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
