{"id":"https://openalex.org/W2605532879","doi":"https://doi.org/10.1109/msst.2016.7897081","title":"Lazy exact deduplication","display_name":"Lazy exact deduplication","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2605532879","doi":"https://doi.org/10.1109/msst.2016.7897081","mag":"2605532879"},"language":"en","primary_location":{"id":"doi:10.1109/msst.2016.7897081","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msst.2016.7897081","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 32nd Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055082533","display_name":"Jingwei Ma","orcid":"https://orcid.org/0000-0001-7220-5927"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingwei Ma","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003936493","display_name":"Rebecca J. Stones","orcid":"https://orcid.org/0000-0002-9991-2995"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rebecca J. Stones","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083713825","display_name":"Yuxiang Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxiang Ma","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061828901","display_name":"Jingui Wang","orcid":"https://orcid.org/0000-0001-9947-9821"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingui Wang","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090585701","display_name":"Junjie Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjie Ren","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100367433","display_name":"Gang Wang","orcid":"https://orcid.org/0000-0003-0387-2501"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Wang","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100390120","display_name":"Xiaoguang Liu","orcid":"https://orcid.org/0000-0002-9010-3278"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoguang Liu","raw_affiliation_strings":["College of Computer and Control Engineering, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Control Engineering, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5055082533"],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":1.4477,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.85292574,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"94","issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9678000211715698,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9390000104904175,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.8100261688232422},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6819793581962585},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.23011910915374756}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.8100261688232422},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6819793581962585},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.23011910915374756}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/msst.2016.7897081","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msst.2016.7897081","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 32nd Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W69510097","https://openalex.org/W174289249","https://openalex.org/W178328500","https://openalex.org/W182307532","https://openalex.org/W200233886","https://openalex.org/W1418704970","https://openalex.org/W1490390347","https://openalex.org/W1521407587","https://openalex.org/W1609518033","https://openalex.org/W1626210897","https://openalex.org/W1794685076","https://openalex.org/W1856507064","https://openalex.org/W1871705262","https://openalex.org/W1969126835","https://openalex.org/W1971212200","https://openalex.org/W1979162731","https://openalex.org/W2028546326","https://openalex.org/W2041920271","https://openalex.org/W2073370301","https://openalex.org/W2080891686","https://openalex.org/W2107551255","https://openalex.org/W2110322986","https://openalex.org/W2123845384","https://openalex.org/W2124632914","https://openalex.org/W2129610259","https://openalex.org/W2149509741","https://openalex.org/W2156468933","https://openalex.org/W2158935814","https://openalex.org/W2167538128","https://openalex.org/W2169502896","https://openalex.org/W2281363974","https://openalex.org/W2293794372","https://openalex.org/W2617867460","https://openalex.org/W3150003982","https://openalex.org/W6602837329","https://openalex.org/W6607057542","https://openalex.org/W6607150254","https://openalex.org/W6607453207","https://openalex.org/W6608138927","https://openalex.org/W6628219055","https://openalex.org/W6629455298","https://openalex.org/W6631269574","https://openalex.org/W6636190696","https://openalex.org/W6638320388","https://openalex.org/W6638808162","https://openalex.org/W6639246177","https://openalex.org/W6642541085","https://openalex.org/W6683485282","https://openalex.org/W6695688771","https://openalex.org/W6738653283","https://openalex.org/W6793350840"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W2587188779","https://openalex.org/W3132870970","https://openalex.org/W4385804830","https://openalex.org/W2943088381","https://openalex.org/W2074021203","https://openalex.org/W2144348063"],"abstract_inverted_index":{"During":[0],"data":[1,22,101],"deduplication,":[2,64],"on-disk":[3,31],"fingerprint":[4,60,87],"lookups":[5,32],"lead":[6],"to":[7,36,48,75],"high":[8],"disk":[9,39],"traffic,":[10],"resulting":[11],"in":[12,33,43,73],"a":[13,20,67],"bottleneck.":[14,40],"In":[15,41],"this":[16],"paper,":[17],"we":[18,65],"propose":[19],"\u201clazy\u201d":[21],"deduplication":[23,42],"method":[24,85,97],"which":[25],"buffers":[26],"incoming":[27,59],"fingerprints":[28],"and":[29],"performs":[30],"batches,":[34],"aiming":[35],"reduce":[37],"the":[38,50,58,83,99],"general,":[44],"prefetching":[45],"is":[46],"used":[47],"improve":[49],"cache":[51],"hit":[52],"rate":[53],"by":[54,90],"exploiting":[55],"locality":[56,72],"within":[57],"stream.":[61],"For":[62],"lazy":[63,84],"design":[66],"buffering":[68],"strategy":[69],"that":[70,82],"preserves":[71],"order":[74],"similarly":[76],"facilitate":[77],"prefetching.":[78],"Experimental":[79],"results":[80],"indicate":[81],"improves":[86],"identification":[88],"performance":[89],"over":[91],"50%":[92],"compared":[93],"with":[94,98],"an":[95],"\u201ceager\u201d":[96],"same":[100],"layout.":[102]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
