{"id":"https://openalex.org/W2055899630","doi":"https://doi.org/10.1145/2391229.2391246","title":"Probabilistic deduplication for cluster-based storage systems","display_name":"Probabilistic deduplication for cluster-based storage systems","publication_year":2012,"publication_date":"2012-10-14","ids":{"openalex":"https://openalex.org/W2055899630","doi":"https://doi.org/10.1145/2391229.2391246","mag":"2055899630"},"language":"en","primary_location":{"id":"doi:10.1145/2391229.2391246","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2391229.2391246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090971823","display_name":"Davide Frey","orcid":"https://orcid.org/0000-0002-6730-5744"},"institutions":[{"id":"https://openalex.org/I4210133778","display_name":"Inria Rennes - Bretagne Atlantique Research Centre","ror":"https://ror.org/04040yw90","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210133778"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Davide Frey","raw_affiliation_strings":["INRIA, Rennes, France","Inria-Rennes, France#TAB#"],"affiliations":[{"raw_affiliation_string":"INRIA, Rennes, France","institution_ids":["https://openalex.org/I4210133778","https://openalex.org/I1326498283"]},{"raw_affiliation_string":"Inria-Rennes, France#TAB#","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031849486","display_name":"Anne-Marie Kermarrec","orcid":"https://orcid.org/0000-0001-8187-724X"},"institutions":[{"id":"https://openalex.org/I4210133778","display_name":"Inria Rennes - Bretagne Atlantique Research Centre","ror":"https://ror.org/04040yw90","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210133778"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Anne-Marie Kermarrec","raw_affiliation_strings":["INRIA, Rennes, France","Inria-Rennes, France#TAB#"],"affiliations":[{"raw_affiliation_string":"INRIA, Rennes, France","institution_ids":["https://openalex.org/I4210133778","https://openalex.org/I1326498283"]},{"raw_affiliation_string":"Inria-Rennes, France#TAB#","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015542078","display_name":"Konstantinos Kloudas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210133778","display_name":"Inria Rennes - Bretagne Atlantique Research Centre","ror":"https://ror.org/04040yw90","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210133778"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Konstantinos Kloudas","raw_affiliation_strings":["INRIA, Rennes, France","Inria-Rennes, France#TAB#"],"affiliations":[{"raw_affiliation_string":"INRIA, Rennes, France","institution_ids":["https://openalex.org/I4210133778","https://openalex.org/I1326498283"]},{"raw_affiliation_string":"Inria-Rennes, France#TAB#","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090971823"],"corresponding_institution_ids":["https://openalex.org/I1326498283","https://openalex.org/I4210133778"],"apc_list":null,"apc_paid":null,"fwci":3.3443,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.9263357,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.952455997467041},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.890993595123291},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8327723145484924},{"id":"https://openalex.org/keywords/stateful-firewall","display_name":"Stateful firewall","score":0.6995996236801147},{"id":"https://openalex.org/keywords/stateless-protocol","display_name":"Stateless protocol","score":0.5634441375732422},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5595666766166687},{"id":"https://openalex.org/keywords/disaster-recovery","display_name":"Disaster recovery","score":0.5064860582351685},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.48743537068367004},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.45058131217956543},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.32265666127204895},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.21403643488883972}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.952455997467041},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.890993595123291},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8327723145484924},{"id":"https://openalex.org/C22927095","wikidata":"https://www.wikidata.org/wiki/Q1784206","display_name":"Stateful firewall","level":3,"score":0.6995996236801147},{"id":"https://openalex.org/C103613024","wikidata":"https://www.wikidata.org/wiki/Q230924","display_name":"Stateless protocol","level":3,"score":0.5634441375732422},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5595666766166687},{"id":"https://openalex.org/C129230348","wikidata":"https://www.wikidata.org/wiki/Q1140205","display_name":"Disaster recovery","level":2,"score":0.5064860582351685},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.48743537068367004},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.45058131217956543},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.32265666127204895},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.21403643488883972},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2391229.2391246","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2391229.2391246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-00728215v1","is_oa":false,"landing_page_url":"https://inria.hal.science/hal-00728215","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Symposium on Cloud Computing, Oct 2012, San Jose, CA, United States","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7186578063","display_name":null,"funder_award_id":"204742","funder_id":"https://openalex.org/F4320334678","funder_display_name":"European Research Council"}],"funders":[{"id":"https://openalex.org/F4320334678","display_name":"European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W5335935","https://openalex.org/W22807665","https://openalex.org/W69510097","https://openalex.org/W182307532","https://openalex.org/W1490390347","https://openalex.org/W1599186236","https://openalex.org/W1847423537","https://openalex.org/W1929726975","https://openalex.org/W1969126835","https://openalex.org/W2007842132","https://openalex.org/W2025051251","https://openalex.org/W2044983364","https://openalex.org/W2056980397","https://openalex.org/W2081037744","https://openalex.org/W2099480861","https://openalex.org/W2107436698","https://openalex.org/W2121133177","https://openalex.org/W2132627996","https://openalex.org/W2149993626","https://openalex.org/W2585172182","https://openalex.org/W6601060166","https://openalex.org/W6640146480"],"related_works":["https://openalex.org/W2035312053","https://openalex.org/W2908539414","https://openalex.org/W4285245558","https://openalex.org/W1437397736","https://openalex.org/W2048606985","https://openalex.org/W112987992","https://openalex.org/W2375056709","https://openalex.org/W4301155776","https://openalex.org/W2012359782","https://openalex.org/W2952411620"],"abstract_inverted_index":{"The":[0],"need":[1],"to":[2,10,22,44],"backup":[3,29],"huge":[4],"quantities":[5],"of":[6,13,16,26,50,55,96],"data":[7,74],"has":[8],"led":[9],"the":[11,24,48,68,90,94],"development":[12],"a":[14,32],"number":[15],"distributed":[17],"deduplication":[18],"techniques":[19],"that":[20],"aim":[21],"reproduce":[23],"operation":[25],"centralized,":[27],"single-node":[28],"systems":[30],"in":[31,53],"cluster-based":[33],"environment.":[34],"At":[35,67],"one":[36],"extreme,":[37,70],"stateful":[38],"solutions":[39],"rely":[40],"on":[41,78],"indexing":[42],"mechanisms":[43],"maximize":[45],"deduplication.":[46,97],"However":[47],"cost":[49,91],"these":[51],"strategies":[52,72],"terms":[54],"computation":[56],"and":[57],"memory":[58],"resources":[59],"makes":[60],"them":[61],"unsuitable":[62],"for":[63],"large-scale":[64],"storage":[65],"systems.":[66],"other":[69],"stateless":[71],"store":[73],"blocks":[75],"based":[76],"only":[77],"their":[79],"content,":[80],"without":[81],"taking":[82],"into":[83],"account":[84],"previous":[85],"placement":[86],"decisions,":[87],"thus":[88],"reducing":[89],"but":[92],"also":[93],"effectiveness":[95]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
