{"id":"https://openalex.org/W2607426441","doi":"https://doi.org/10.1109/msst.2016.7897082","title":"Sorted deduplication: How to process thousands of backup streams","display_name":"Sorted deduplication: How to process thousands of backup streams","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2607426441","doi":"https://doi.org/10.1109/msst.2016.7897082","mag":"2607426441"},"language":"en","primary_location":{"id":"doi:10.1109/msst.2016.7897082","is_oa":false,"landing_page_url":"http://doi.org/10.1109/msst.2016.7897082","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 32nd Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049694671","display_name":"J\u00fcrgen Kaiser","orcid":null},"institutions":[{"id":"https://openalex.org/I197323543","display_name":"Johannes Gutenberg University Mainz","ror":"https://ror.org/023b0x485","country_code":"DE","type":"education","lineage":["https://openalex.org/I197323543"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jurgen Kaiser","raw_affiliation_strings":["Johannes Gutenberg-University, Mainz, Germany"],"affiliations":[{"raw_affiliation_string":"Johannes Gutenberg-University, Mainz, Germany","institution_ids":["https://openalex.org/I197323543"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024794786","display_name":"Tim S\u00fc\u00df","orcid":"https://orcid.org/0000-0001-9935-798X"},"institutions":[{"id":"https://openalex.org/I197323543","display_name":"Johannes Gutenberg University Mainz","ror":"https://ror.org/023b0x485","country_code":"DE","type":"education","lineage":["https://openalex.org/I197323543"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tim Suss","raw_affiliation_strings":["Johannes Gutenberg-University, Mainz, Germany"],"affiliations":[{"raw_affiliation_string":"Johannes Gutenberg-University, Mainz, Germany","institution_ids":["https://openalex.org/I197323543"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066777443","display_name":"Lars Nagel","orcid":"https://orcid.org/0000-0002-1444-9541"},"institutions":[{"id":"https://openalex.org/I197323543","display_name":"Johannes Gutenberg University Mainz","ror":"https://ror.org/023b0x485","country_code":"DE","type":"education","lineage":["https://openalex.org/I197323543"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lars Nagel","raw_affiliation_strings":["Johannes Gutenberg-University, Mainz, Germany"],"affiliations":[{"raw_affiliation_string":"Johannes Gutenberg-University, Mainz, Germany","institution_ids":["https://openalex.org/I197323543"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011466225","display_name":"Andr\u00e9 Brinkmann","orcid":"https://orcid.org/0000-0003-3083-2775"},"institutions":[{"id":"https://openalex.org/I197323543","display_name":"Johannes Gutenberg University Mainz","ror":"https://ror.org/023b0x485","country_code":"DE","type":"education","lineage":["https://openalex.org/I197323543"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Andre Brinkmann","raw_affiliation_strings":["Johannes Gutenberg-University, Mainz, Germany"],"affiliations":[{"raw_affiliation_string":"Johannes Gutenberg-University, Mainz, Germany","institution_ids":["https://openalex.org/I197323543"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5049694671"],"corresponding_institution_ids":["https://openalex.org/I197323543"],"apc_list":null,"apc_paid":null,"fwci":1.4477,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.85301993,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"1518","issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9102658629417419},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.8594561815261841},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7394880056381226},{"id":"https://openalex.org/keywords/streams","display_name":"STREAMS","score":0.65305495262146},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.537757396697998},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4388864040374756},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.314214289188385}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9102658629417419},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.8594561815261841},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7394880056381226},{"id":"https://openalex.org/C42090638","wikidata":"https://www.wikidata.org/wiki/Q4048907","display_name":"STREAMS","level":2,"score":0.65305495262146},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.537757396697998},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4388864040374756},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.314214289188385}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/msst.2016.7897082","is_oa":false,"landing_page_url":"http://doi.org/10.1109/msst.2016.7897082","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 32nd Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W38366299","https://openalex.org/W69510097","https://openalex.org/W120798314","https://openalex.org/W174289249","https://openalex.org/W182307532","https://openalex.org/W200233886","https://openalex.org/W1445823765","https://openalex.org/W1454188553","https://openalex.org/W1576397915","https://openalex.org/W1587463421","https://openalex.org/W1969126835","https://openalex.org/W1969949656","https://openalex.org/W1997349095","https://openalex.org/W2016947394","https://openalex.org/W2068739275","https://openalex.org/W2073053905","https://openalex.org/W2110322986","https://openalex.org/W2111238480","https://openalex.org/W2126381420","https://openalex.org/W2132069633","https://openalex.org/W2133806194","https://openalex.org/W2149993626","https://openalex.org/W2158935814","https://openalex.org/W2281363974","https://openalex.org/W2295428206","https://openalex.org/W2408052048","https://openalex.org/W4230077428","https://openalex.org/W4231916799","https://openalex.org/W6601562713","https://openalex.org/W6602837329","https://openalex.org/W6604953292","https://openalex.org/W6607057542","https://openalex.org/W6607453207","https://openalex.org/W6608138927","https://openalex.org/W6628236356","https://openalex.org/W6628419871","https://openalex.org/W6634463810","https://openalex.org/W6668892049","https://openalex.org/W6679663036","https://openalex.org/W6680238116","https://openalex.org/W6681940154","https://openalex.org/W6683485282","https://openalex.org/W6695688771","https://openalex.org/W6713527140"],"related_works":["https://openalex.org/W4285245558","https://openalex.org/W1437397736","https://openalex.org/W1505619784","https://openalex.org/W2568159272","https://openalex.org/W4238047024","https://openalex.org/W2969586989","https://openalex.org/W755035568","https://openalex.org/W2144577446","https://openalex.org/W4237647408","https://openalex.org/W2498664647"],"abstract_inverted_index":{"The":[0,92,110],"requirements":[1],"of":[2,20,37,80],"deduplication":[3,12,74,150],"systems":[4,13],"have":[5],"changed":[6],"in":[7],"the":[8,24,65,84,88,96,120,127,130,135,146,161,170],"last":[9],"years.":[10],"Early":[11],"had":[14],"to":[15,18,32,35,51,113,140,155],"process":[16,33],"dozens":[17],"hundreds":[19,34],"backup":[21,81,121],"streams":[22,63,82],"at":[23,83,169],"same":[25,85,89,171],"time":[26,86],"while":[27,123,165],"today":[28],"they":[29],"are":[30],"able":[31],"thousands":[36,79],"them.":[38],"Traditional":[39],"approaches":[40],"rely":[41],"on":[42,87,119,129],"stream-locality,":[43],"which":[44,48],"supports":[45],"parallelism,":[46],"but":[47],"easily":[49],"leads":[50,112],"many":[52],"non-contiguous":[53],"disk":[54,116],"accesses,":[55],"as":[56],"each":[57],"stream":[58],"competes":[59],"with":[60],"all":[61],"other":[62],"for":[64,77],"available":[66],"resources.":[67],"This":[68],"paper":[69],"presents":[70],"a":[71,104],"new":[72,105,136],"exact":[73,147],"approach":[75,94,137],"designed":[76],"processing":[78],"fingerprint":[90],"index.":[91],"underlying":[93],"destroys":[95],"traditionally":[97],"exploited":[98],"temporal":[99],"chunk":[100],"locality":[101],"and":[102,153],"creates":[103],"one":[106],"by":[107],"sorting":[108,111],"fingerprints.":[109],"perfectly":[114],"sequential":[115],"access":[117],"patterns":[118],"servers,":[122],"only":[124],"slightly":[125],"increasing":[126],"load":[128],"clients.":[131],"In":[132],"our":[133],"experiments,":[134],"generates":[138],"up":[139,154],"113":[141],"times":[142,157],"less":[143,158,167],"I/Os":[144,159],"than":[145,160],"Data":[148],"Domain":[149],"file":[151],"system":[152],"12":[156],"approximate":[162],"Sparse":[163],"Indexing,":[164],"consuming":[166],"memory":[168],"time.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
