{"id":"https://openalex.org/W4281673957","doi":"https://doi.org/10.1145/3507921","title":"From Hyper-dimensional Structures to Linear Structures: Maintaining Deduplicated Data\u2019s Locality","display_name":"From Hyper-dimensional Structures to Linear Structures: Maintaining Deduplicated Data\u2019s Locality","publication_year":2022,"publication_date":"2022-06-02","ids":{"openalex":"https://openalex.org/W4281673957","doi":"https://doi.org/10.1145/3507921"},"language":"en","primary_location":{"id":"doi:10.1145/3507921","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3507921","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3507921","source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3507921","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021153181","display_name":"Xiangyu Zou","orcid":"https://orcid.org/0000-0001-5104-8301"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zou","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-5104-8301","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033315651","display_name":"Jingsong Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingsong Yuan","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091766663","display_name":"Philip Shilane","orcid":"https://orcid.org/0000-0003-1235-0502"},"institutions":[{"id":"https://openalex.org/I4210149435","display_name":"DELL (United States)","ror":"https://ror.org/05rejmm18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210149435"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip Shilane","raw_affiliation_strings":["Dell Technologies, Newtown, PA, USA"],"raw_orcid":"https://orcid.org/0000-0003-1235-0502","affiliations":[{"raw_affiliation_string":"Dell Technologies, Newtown, PA, USA","institution_ids":["https://openalex.org/I4210149435"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050928208","display_name":"Wen Xia","orcid":"https://orcid.org/0000-0003-4093-6391"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210138186","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210138186"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Xia","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China and Wuhan National Laboratory for Optoelectronics, HUST, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China and Wuhan National Laboratory for Optoelectronics, HUST, Shenzhen, China","institution_ids":["https://openalex.org/I4210138186","https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458457","display_name":"Haijun Zhang","orcid":"https://orcid.org/0000-0002-1648-0227"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haijun Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100328995","display_name":"Xuan Wang","orcid":"https://orcid.org/0000-0002-3512-0649"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":30.2731,"has_fulltext":true,"cited_by_count":104,"citation_normalized_percentile":{"value":0.9972884,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"18","issue":"3","first_page":"1","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8863509297370911},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.8373613953590393},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7767125964164734},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.759299635887146},{"id":"https://openalex.org/keywords/garbage-collection","display_name":"Garbage collection","score":0.5127056837081909},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.504658579826355},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.45980751514434814},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4416457414627075},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.39600706100463867},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2192753255367279},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.19617107510566711}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8863509297370911},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.8373613953590393},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7767125964164734},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.759299635887146},{"id":"https://openalex.org/C105122174","wikidata":"https://www.wikidata.org/wiki/Q322202","display_name":"Garbage collection","level":3,"score":0.5127056837081909},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.504658579826355},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.45980751514434814},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4416457414627075},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.39600706100463867},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2192753255367279},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.19617107510566711},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C75403996","wikidata":"https://www.wikidata.org/wiki/Q5521979","display_name":"Garbage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3507921","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3507921","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3507921","source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3507921","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3507921","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3507921","source":{"id":"https://openalex.org/S158124317","display_name":"ACM Transactions on Storage","issn_l":"1553-3077","issn":["1553-3077","1553-3093"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Storage","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2280393401","display_name":null,"funder_award_id":"2018WNLOKF008","funder_id":"https://openalex.org/F4320325104","funder_display_name":"Wuhan National Laboratory for Optoelectronics"},{"id":"https://openalex.org/G3024757080","display_name":null,"funder_award_id":"2021A1515012634, 2021B1515020088","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G31233193","display_name":"\u5927\u6570\u636e\u73af\u5883\u4e0b\u7684\u5927\u670d\u52a1\u7406\u8bba\u4e0e\u65b9\u6cd5\u7814\u7a76","funder_award_id":"61832004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4107755031","display_name":null,"funder_award_id":"2021B1515020088","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G5379840081","display_name":"\u9762\u5411\u76f8\u4f3c\u6570\u636e\u6d88\u5197\u7684\u6570\u636e\u5b58\u50a8\u7ec4\u7ec7\u53ca\u5173\u952e\u6280\u672f\u7814\u7a76","funder_award_id":"61972441","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5840073679","display_name":"\u5185\u5bb9\u611f\u77e5\u7684\u89c6\u94fe\u5b66\u4e60\u53ca\u4f18\u5316\u5173\u952e\u6280\u672f\u7814\u7a76","funder_award_id":"61972112","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6593991010","display_name":null,"funder_award_id":"61972441, 61972112, and 61832004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320325104","display_name":"Wuhan National Laboratory for Optoelectronics","ror":"https://ror.org/03c9ncn37"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4281673957.pdf","grobid_xml":"https://content.openalex.org/works/W4281673957.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W5335935","https://openalex.org/W22807665","https://openalex.org/W120798314","https://openalex.org/W178328500","https://openalex.org/W1437397736","https://openalex.org/W1553098517","https://openalex.org/W1969126835","https://openalex.org/W1971212200","https://openalex.org/W1979162731","https://openalex.org/W1997349095","https://openalex.org/W1998817683","https://openalex.org/W2010150514","https://openalex.org/W2012359782","https://openalex.org/W2027016098","https://openalex.org/W2033062224","https://openalex.org/W2056980397","https://openalex.org/W2107551255","https://openalex.org/W2110322986","https://openalex.org/W2156719566","https://openalex.org/W2172174733","https://openalex.org/W2475932436","https://openalex.org/W2605250808","https://openalex.org/W2751343396","https://openalex.org/W2810972659","https://openalex.org/W2918722987","https://openalex.org/W2986445348","https://openalex.org/W3004612141","https://openalex.org/W4301155776","https://openalex.org/W6999478085"],"related_works":["https://openalex.org/W2955195711","https://openalex.org/W1437397736","https://openalex.org/W4285245558","https://openalex.org/W2048606985","https://openalex.org/W2375056709","https://openalex.org/W2012359782","https://openalex.org/W2952411620","https://openalex.org/W4301155776","https://openalex.org/W2348132657","https://openalex.org/W2179326652"],"abstract_inverted_index":{"Data":[0],"deduplication":[1,129,208],"is":[2,108,238],"widely":[3],"used":[4],"to":[5,25,53,67,82,96,103,213,222,226],"reduce":[6],"the":[7,15,27,33,36,43,89,98,227,232,249,260,265],"size":[8],"of":[9,18,39,46,112,267],"backup":[10,198,257],"workloads,":[11],"but":[12,79],"it":[13,237],"has":[14,63],"known":[16],"disadvantage":[17],"causing":[19],"poor":[20,54],"data":[21,41,74,102,121,136,229],"locality,":[22],"also":[23],"referred":[24],"as":[26,141,143],"fragmentation":[28,80],"problem.":[29],"This":[30],"results":[31,195],"from":[32],"gap":[34],"between":[35],"hyper-dimensional":[37,99],"structure":[38],"deduplicated":[40,101],"and":[42,50,56,85,116,138,156,176,216],"sequential":[44,182],"nature":[45],"many":[47],"storage":[48],"devices,":[49],"this":[51,117],"leads":[52],"restore":[55,84,217],"garbage":[57],"collection":[58],"(GC)":[59],"performance.":[60,87],"Current":[61],"research":[62],"considered":[64],"writing":[65],"duplicates":[66],"maintain":[68],"locality":[69,90,140],"(e.g.,":[70],"rewriting)":[71],"or":[72,77],"caching":[73],"in":[75,151],"memory":[76],"SSD,":[78],"continues":[81],"lower":[83],"GC":[86,246],"Investigating":[88],"issue,":[91],"we":[92,124,146],"design":[93],"a":[94,104,126,166,180,243],"method":[95],"flatten":[97],"structured":[100],"one-dimensional":[105],"format,":[106],"which":[107,184],"based":[109],"on":[110],"classification":[111],"each":[113],"chunk\u2019s":[114],"lifecycle,":[115],"creates":[118],"our":[119,135],"proposed":[120],"layout.":[122,230],"Furthermore,":[123],"present":[125],"novel":[127],"management-friendly":[128],"framework,":[130],"called":[131],"MFDedup,":[132],"that":[133,210,219],"applies":[134],"layout":[137],"maintains":[139],"much":[142],"possible.":[144],"Specifically,":[145],"use":[147],"two":[148,256],"key":[149],"techniques":[150],"MFDedup:":[152],"Neighbor-duplicate-focus":[153],"indexing":[154],"(NDF)":[155],"Across-version-aware":[157],"Reorganization":[158],"scheme":[159],"(AVAR).":[160],"NDF":[161,250],"performs":[162],"duplicate":[163],"detection":[164],"against":[165],"previous":[167],"backup,":[168],"then":[169],"AVAR":[170],"rearranges":[171],"chunks":[172],"with":[173,196,203,264],"an":[174],"offline":[175],"iterative":[177],"algorithm":[178],"into":[179],"compact,":[181],"layout,":[183],"nearly":[185],"eliminates":[186],"random":[187],"I/O":[188],"during":[189],"file":[190],"restores":[191],"after":[192],"deduplication.":[193],"Evaluation":[194],"five":[197],"datasets":[199],"demonstrate":[200],"that,":[201],"compared":[202],"state-of-the-art":[204],"techniques,":[205],"MFDedup":[206],"achieves":[207],"ratios":[209],"are":[211,220],"1.12\u00d7":[212],"2.19\u00d7":[214],"higher":[215],"throughputs":[218],"1.92\u00d7":[221],"10.02\u00d7":[223],"faster":[224],"due":[225],"improved":[228],"While":[231],"rearranging":[233],"stage":[234],"introduces":[235],"overheads,":[236],"more":[239],"than":[240],"offset":[241],"by":[242],"nearly-zero":[244],"overhead":[245],"process.":[247],"Moreover,":[248],"index":[251,262],"only":[252],"requires":[253],"indices":[254],"for":[255],"versions,":[258],"while":[259],"traditional":[261],"grows":[263],"number":[266],"versions":[268],"retained.":[269]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":63},{"year":2024,"cited_by_count":32}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
