{"id":"https://openalex.org/W4321089583","doi":"https://doi.org/10.1145/3584663","title":"The Design of Fast and Lightweight Resemblance Detection for Efficient Post-Deduplication Delta Compression","display_name":"The Design of Fast and Lightweight Resemblance Detection for Efficient Post-Deduplication Delta Compression","publication_year":2023,"publication_date":"2023-02-16","ids":{"openalex":"https://openalex.org/W4321089583","doi":"https://doi.org/10.1145/3584663"},"language":"en","primary_location":{"id":"doi:10.1145/3584663","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3584663","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050928208","display_name":"Wen Xia","orcid":"https://orcid.org/0000-0003-4093-6391"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Xia","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen; Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","Harbin Institute of Technology, Shenzhen"],"raw_orcid":"https://orcid.org/0000-0003-4093-6391","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen; Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090768106","display_name":"Lifeng Pu","orcid":"https://orcid.org/0000-0002-9117-2590"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lifeng Pu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-9117-2590","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021153181","display_name":"Xiangyu Zou","orcid":"https://orcid.org/0000-0001-5104-8301"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zou","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-5104-8301","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091766663","display_name":"Philip Shilane","orcid":"https://orcid.org/0000-0003-1235-0502"},"institutions":[{"id":"https://openalex.org/I4210149435","display_name":"DELL (United States)","ror":"https://ror.org/05rejmm18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210149435"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip Shilane","raw_affiliation_strings":["Dell Technologies, USA"],"raw_orcid":"https://orcid.org/0000-0003-1235-0502","affiliations":[{"raw_affiliation_string":"Dell Technologies, USA","institution_ids":["https://openalex.org/I4210149435"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100626061","display_name":"Shiyi Li","orcid":"https://orcid.org/0000-0001-8206-6916"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyi Li","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-8206-6916","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458457","display_name":"Haijun Zhang","orcid":"https://orcid.org/0000-0002-1648-0227"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haijun Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-1648-0227","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100328995","display_name":"Xuan Wang","orcid":"https://orcid.org/0000-0002-3512-0649"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen; Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","Harbin Institute of Technology, Shenzhen"],"raw_orcid":"https://orcid.org/0000-0002-3512-0649","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen; Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":18.2071,"has_fulltext":false,"cited_by_count":96,"citation_normalized_percentile":{"value":0.99572827,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"19","issue":"3","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.842228889465332},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.7328770160675049},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.7211582660675049},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5779911279678345},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5545098185539246},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5413825511932373},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5224934220314026},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.520442545413971},{"id":"https://openalex.org/keywords/double-hashing","display_name":"Double hashing","score":0.47946488857269287},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.4655269980430603},{"id":"https://openalex.org/keywords/hash-tree","display_name":"Hash tree","score":0.4601154923439026},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.45852935314178467},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.44215163588523865},{"id":"https://openalex.org/keywords/rolling-hash","display_name":"Rolling hash","score":0.43409132957458496},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.426718145608902},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.4211944341659546},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.19765052199363708},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.1536431610584259},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08423355221748352},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.08168011903762817}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.842228889465332},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.7328770160675049},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.7211582660675049},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5779911279678345},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5545098185539246},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5413825511932373},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5224934220314026},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.520442545413971},{"id":"https://openalex.org/C138111711","wikidata":"https://www.wikidata.org/wiki/Q478351","display_name":"Double hashing","level":4,"score":0.47946488857269287},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.4655269980430603},{"id":"https://openalex.org/C27353603","wikidata":"https://www.wikidata.org/wiki/Q16948622","display_name":"Hash tree","level":4,"score":0.4601154923439026},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.45852935314178467},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44215163588523865},{"id":"https://openalex.org/C108546238","wikidata":"https://www.wikidata.org/wiki/Q4228982","display_name":"Rolling hash","level":5,"score":0.43409132957458496},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.426718145608902},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.4211944341659546},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.19765052199363708},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.1536431610584259},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08423355221748352},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.08168011903762817},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3584663","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3584663","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2461541934","display_name":null,"funder_award_id":"RCYX20210609104510007, JCYJ20210324131203009, JCYJ20200109113427092, GXWD20201230155427003-20200821172511002","funder_id":"https://openalex.org/F4320336569","funder_display_name":"Shenzhen Science and Technology Innovation Program"},{"id":"https://openalex.org/G4107755031","display_name":null,"funder_award_id":"2021B1515020088","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G7536437408","display_name":null,"funder_award_id":"61972441, 61972112, 61832004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336569","display_name":"Shenzhen Science and Technology Innovation Program","ror":null},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W22807665","https://openalex.org/W1437397736","https://openalex.org/W1947728686","https://openalex.org/W1969335064","https://openalex.org/W1991750682","https://openalex.org/W2060108852","https://openalex.org/W2073370301","https://openalex.org/W2081193615","https://openalex.org/W2099886759","https://openalex.org/W2107745473","https://openalex.org/W2122962290","https://openalex.org/W2123767107","https://openalex.org/W2148885851","https://openalex.org/W2346147088","https://openalex.org/W2475932436","https://openalex.org/W2751343396","https://openalex.org/W2976670629","https://openalex.org/W2986445348","https://openalex.org/W2998583855","https://openalex.org/W4255088304","https://openalex.org/W4255553477"],"related_works":["https://openalex.org/W2069568684","https://openalex.org/W1536525740","https://openalex.org/W4285028289","https://openalex.org/W113683524","https://openalex.org/W2765699882","https://openalex.org/W1996864362","https://openalex.org/W2332259278","https://openalex.org/W2347294433","https://openalex.org/W2154676777","https://openalex.org/W2376661060"],"abstract_inverted_index":{"Post-deduplication":[0],"delta":[1,50],"compression":[2,31,51,121,262,270],"is":[3,24,212,244],"a":[4,28,97,119,126,160,167],"data":[5,76,236],"reduction":[6,237],"technique":[7],"that":[8,104,202],"calculates":[9],"and":[10,78,99,118,179,214,221,247,253,266],"stores":[11],"the":[12,34,47,69,86,107,151,155,174,193,197,204,209,218,240,251,260],"differences":[13],"of":[14,37,49,64,85,146,154,196,206,226,264],"very":[15,29],"similar":[16],"but":[17],"non-duplicate":[18],"chunks":[19,77],"in":[20],"storage":[21,238],"systems,":[22],"which":[23],"able":[25],"to":[26,54,143,150,165,192],"achieve":[27],"high":[30,56,115,120,261],"ratio.":[32,122],"However,":[33],"low":[35],"throughput":[36,243],"widely":[38],"used":[39],"resemblance":[40,101,111,189,207],"detection":[41,102,112,116,190],"approaches":[42],"(e.g.,":[43],"N-Transform)":[44],"usually":[45],"becomes":[46],"bottleneck":[48],"systems":[52],"due":[53],"introducing":[55],"computational":[57,108],"overhead.":[58],"Generally,":[59],"this":[60,92,184],"overhead":[61,109],"mainly":[62],"consists":[63],"two":[65],"parts:":[66],"\u2460":[67],"calculating":[68],"rolling":[70,88,147,176],"hash":[71,89,132,171,177,186],"byte":[72,74],"by":[73],"across":[75],"\u2461":[79],"applying":[80],"multiple":[81],"transforms":[82,182],"on":[83,183],"all":[84],"calculated":[87],"values.":[90],"In":[91],"article,":[93],"we":[94],"propose":[95],"Odess,":[96],"fast":[98],"lightweight":[100],"approach,":[103],"greatly":[105],"reduces":[106],"for":[110,188],"while":[113,258],"achieving":[114,267],"accuracy":[117],"Odess":[123,157,210],"first":[124,152],"utilizes":[125],"novel":[127,161],"Subwindow-based":[128],"Parallel":[129],"Rolling":[130],"(SWPR)":[131],"method":[133,164],"using":[134],"Single":[135],"Instruction":[136],"Multiple":[137],"Data":[138],"[":[139,228],"1":[140],"]":[141],"(SIMD)":[142],"accelerate":[144],"calculation":[145],"hashes":[148],"(corresponding":[149,191],"part":[153,195],"overhead).":[156,198],"then":[158],"uses":[159],"Content-Defined":[162],"Sampling":[163],"generate":[166],"much":[168],"smaller":[169],"proxy":[170],"set":[172,178,187],"from":[173],"whole":[175],"quickly":[180],"applies":[181],"small":[185],"second":[194],"Evaluation":[199],"results":[200],"show":[201],"during":[203],"stage":[205],"detection,":[208],"approach":[211],"\u223c31.4\u00d7":[213],"\u223c7.9\u00d7":[215],"faster":[216],"than":[217,250],"state-of-the-art":[219],"N-Transform":[220,227,265],"Finesse":[222],"(a":[223],"recent":[224],"variant":[225],"39":[229],"]),":[230],"respectively.":[231],"When":[232],"considering":[233],"an":[234],"end-to-end":[235],"system,":[239],"Odess-based":[241],"system\u2019s":[242],"about":[245],"3.20\u00d7":[246],"1.41\u00d7":[248],"higher":[249,269],"N-Transform-":[252],"Finesse-based":[254],"systems\u2019":[255],"throughput,":[256],"respectively,":[257],"maintaining":[259],"ratio":[263,271],"\u223c1.22\u00d7":[268],"over":[272],"Finesse.":[273]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":62},{"year":2024,"cited_by_count":26}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
