{"id":"https://openalex.org/W4412393310","doi":"https://doi.org/10.1145/3747839","title":"Argus: A Precise and Efficient Resemblance Detection for Post-Deduplication Delta Compression","display_name":"Argus: A Precise and Efficient Resemblance Detection for Post-Deduplication Delta Compression","publication_year":2025,"publication_date":"2025-07-14","ids":{"openalex":"https://openalex.org/W4412393310","doi":"https://doi.org/10.1145/3747839"},"language":"en","primary_location":{"id":"doi:10.1145/3747839","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3747839","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3747839","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Han Xu","orcid":"https://orcid.org/0009-0007-5358-2082"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Han Xu","raw_affiliation_strings":["Harbin Institute of Technology Shenzhen","Harbin Institute of Technology Shenzhen, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0007-5358-2082","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021153181","display_name":"Xiangyu Zou","orcid":"https://orcid.org/0000-0001-5104-8301"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zou","raw_affiliation_strings":["Harbin Institute of Technology Shenzhen","Harbin Institute of Technology Shenzhen, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-5104-8301","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103401276","display_name":"Yunsheng Dong","orcid":"https://orcid.org/0009-0000-5004-013X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunsheng Dong","raw_affiliation_strings":["Harbin Institute of Technology Shenzhen","Harbin Institute of Technology Shenzhen, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0000-5004-013X","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091766663","display_name":"Philip Shilane","orcid":"https://orcid.org/0000-0003-1235-0502"},"institutions":[{"id":"https://openalex.org/I4210149435","display_name":"DELL (United States)","ror":"https://ror.org/05rejmm18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210149435"]},{"id":"https://openalex.org/I4210163742","display_name":"Newtown Hospital","ror":"https://ror.org/04sahz302","country_code":"GB","type":"healthcare","lineage":["https://openalex.org/I4210163742"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Philip Shilane","raw_affiliation_strings":["Dell Technologies","Dell Technologies, Newtown, United States"],"raw_orcid":"https://orcid.org/0000-0003-1235-0502","affiliations":[{"raw_affiliation_string":"Dell Technologies","institution_ids":["https://openalex.org/I4210149435"]},{"raw_affiliation_string":"Dell Technologies, Newtown, United States","institution_ids":["https://openalex.org/I4210163742"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059296415","display_name":"Yanqi Pan","orcid":"https://orcid.org/0009-0007-7832-0599"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanqi Pan","raw_affiliation_strings":["Harbin Institute of Technology Shenzhen","Harbin Institute of Technology Shenzhen, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0007-7832-0599","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101656886","display_name":"Cai Deng","orcid":"https://orcid.org/0000-0002-7717-6990"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cai Deng","raw_affiliation_strings":["Harbin Institute of Technology Shenzhen","Harbin Institute of Technology Shenzhen, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-7717-6990","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050928208","display_name":"Wen Xia","orcid":"https://orcid.org/0000-0003-4093-6391"},"institutions":[{"id":"https://openalex.org/I158809036","display_name":"Shenzhen Institute of Information Technology","ror":"https://ror.org/03wrf9427","country_code":"CN","type":"education","lineage":["https://openalex.org/I158809036"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Xia","raw_affiliation_strings":["School of Computer Science, Harbin Institute of Technology Shenzhen","School of Computer Science, Harbin Institute of Technology Shenzhen, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-4093-6391","affiliations":[{"raw_affiliation_string":"School of Computer Science, Harbin Institute of Technology Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"School of Computer Science, Harbin Institute of Technology Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I158809036","https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07504559,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"1","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9753999710083008,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9753999710083008,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9621000289916992,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.909500002861023,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.8320339918136597},{"id":"https://openalex.org/keywords/argus","display_name":"Argus","score":0.8257502317428589},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8234228491783142},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3064500391483307},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07186558842658997}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.8320339918136597},{"id":"https://openalex.org/C2776760603","wikidata":"https://www.wikidata.org/wiki/Q3622471","display_name":"Argus","level":2,"score":0.8257502317428589},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8234228491783142},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3064500391483307},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07186558842658997}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3747839","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3747839","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3747839","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3747839","pdf_url":null,"source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4100129303","display_name":null,"funder_award_id":"62472127","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5449262284","display_name":null,"funder_award_id":"2023A1515110072","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1913628733","https://openalex.org/W1969335064","https://openalex.org/W1991750682","https://openalex.org/W2060108852","https://openalex.org/W2073370301","https://openalex.org/W2077139964","https://openalex.org/W2110423232","https://openalex.org/W2122962290","https://openalex.org/W2162752393","https://openalex.org/W2475932436","https://openalex.org/W2492590231","https://openalex.org/W2734941459","https://openalex.org/W2998583855","https://openalex.org/W4220848902","https://openalex.org/W4289533983","https://openalex.org/W4312429406","https://openalex.org/W4401070642","https://openalex.org/W4405022616"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2297821122","https://openalex.org/W4249800674","https://openalex.org/W4255014369","https://openalex.org/W1583120979","https://openalex.org/W2005545852","https://openalex.org/W2087282387","https://openalex.org/W4410300319"],"abstract_inverted_index":{"For":[0],"data":[1,31,44,103],"reduction":[2,45],"techniques":[3,82],"used":[4],"in":[5,167,192],"storage":[6],"systems,":[7],"delta":[8,239],"compression":[9,23,196,240],"is":[10],"often":[11],"implemented":[12],"after":[13],"deduplication,":[14],"having":[15],"been":[16],"shown":[17],"to":[18,83,109,159,220,227,245],"achieve":[19,187],"a":[20,72,91,155,193],"much":[21,194],"higher":[22,195,238],"ratio":[24,46,197,241],"by":[25,213],"efficiently":[26],"detecting":[27],"and":[28,43,74,117,135,154,176,231,236,242,252],"compressing":[29],"similar":[30,180],"chunks.":[32],"Unfortunately,":[33],"existing":[34],"resemblance":[35,64,76,207],"detection":[36,77,133,189],"approaches":[37],"cannot":[38],"maintain":[39],"both":[40,130],"high":[41,132],"throughput":[42],"simultaneously":[47],"since":[48],"they":[49],"either":[50],"introduce":[51],"heavy":[52],"calculation":[53],"overhead":[54,205],"or":[55],"generate":[56],"useless":[57],"features":[58,119,126],"that":[59],"reduce":[60],"the":[61,97,110,114,143,161,164,168,171,203,221],"accuracy":[62,134],"of":[63,101,113,163,173],"detection.":[65,208],"In":[66],"this":[67],"article,":[68],"we":[69],"propose":[70],"Argus,":[71],"fast":[73],"precise":[75],"approach":[78],"using":[79],"two":[80],"primary":[81],"improve":[84],"data-reduction":[85],"efficiency":[86],"significantly.":[87],"First,":[88],"Argus":[89,124,148,185,224],"utilizes":[90,149],"Bin-Wise":[92,145],"Partitioning":[93,146],"strategy,":[94,147],"which":[95,129],"separates":[96],"rolling":[98,152],"hash":[99,115,153],"values":[100],"each":[102],"chunk":[104],"into":[105],"different":[106],"subsets":[107],"according":[108],"suffix":[111],"bits":[112],"value":[116],"generates":[118,125],"from":[120],"these":[121],"subsets.":[122],"Thus,":[123],"more":[127],"efficiently,":[128],"achieves":[131,225,243],"improves":[136],"feature":[137,174,257],"generation":[138,258],"speed.":[139,259],"Second,":[140],"based":[141],"on":[142],"efficient":[144],"fine-grained":[150],"Gear":[151],"Plain":[156],"Feature":[157],"strategy":[158],"manage":[160],"granularity":[162],"content":[165],"represented":[166],"feature,":[169],"increasing":[170],"probability":[172],"matching":[175],"catching":[177],"as":[178,182],"many":[179],"chunks":[181],"possible.":[183],"Consequently,":[184],"can":[186],"better":[188],"accuracy,":[190],"resulting":[191],"than":[198],"previous":[199],"works":[200],"while":[201],"minimizing":[202],"computational":[204],"for":[206],"Our":[209],"evaluation":[210],"results":[211],"driven":[212],"several":[214],"real-world":[215],"datasets":[216],"suggest":[217],"that,":[218],"compared":[219],"state-of-the-art":[222],"approaches,":[223],"up":[226,244],"1.64":[228],"\u00d7":[229,233,247,250,254],"(DeepSketch)":[230],"2.29":[232],"(Finesse,":[234],"Odess,":[235],"N-Transform)":[237],"19.9":[246],"(N-Transform),":[248],"5.57":[249],"(Finesse),":[251],"1.18":[253],"(Odess)":[255],"faster":[256]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
