{"id":"https://openalex.org/W4360765171","doi":"https://doi.org/10.1109/asonam55673.2022.10068661","title":"Whole-File Chunk-Based Deduplication Using Reinforcement Learning for Cloud Storage","display_name":"Whole-File Chunk-Based Deduplication Using Reinforcement Learning for Cloud Storage","publication_year":2022,"publication_date":"2022-11-10","ids":{"openalex":"https://openalex.org/W4360765171","doi":"https://doi.org/10.1109/asonam55673.2022.10068661"},"language":"en","primary_location":{"id":"doi:10.1109/asonam55673.2022.10068661","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/asonam55673.2022.10068661","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084058122","display_name":"Xincheng Yuan","orcid":"https://orcid.org/0000-0002-8741-4075"},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xincheng Yuan","raw_affiliation_strings":["San Jos&#x00E9; State University,Department of Computer Science,San Jos&#x00E9;,CA,USA"],"affiliations":[{"raw_affiliation_string":"San Jos&#x00E9; State University,Department of Computer Science,San Jos&#x00E9;,CA,USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021884742","display_name":"Melody Moh","orcid":"https://orcid.org/0000-0002-8313-6645"},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Melody Moh","raw_affiliation_strings":["San Jos&#x00E9; State University,Department of Computer Science,San Jos&#x00E9;,CA,USA"],"affiliations":[{"raw_affiliation_string":"San Jos&#x00E9; State University,Department of Computer Science,San Jos&#x00E9;,CA,USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019298277","display_name":"Teng-Sheng Moh","orcid":"https://orcid.org/0000-0002-2726-102X"},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Teng-Sheng Moh","raw_affiliation_strings":["San Jos&#x00E9; State University,Department of Computer Science,San Jos&#x00E9;,CA,USA"],"affiliations":[{"raw_affiliation_string":"San Jos&#x00E9; State University,Department of Computer Science,San Jos&#x00E9;,CA,USA","institution_ids":["https://openalex.org/I51504820"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5084058122"],"corresponding_institution_ids":["https://openalex.org/I51504820"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20995921,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"269","last_page":"276"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9727540016174316},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8694598078727722},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.7878068685531616},{"id":"https://openalex.org/keywords/cloud-storage","display_name":"Cloud storage","score":0.5893170833587646},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5255652666091919},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5126177668571472},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4986388683319092},{"id":"https://openalex.org/keywords/computer-data-storage","display_name":"Computer data storage","score":0.4732169210910797},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3500218391418457},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2827186584472656},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.08519747853279114}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9727540016174316},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8694598078727722},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.7878068685531616},{"id":"https://openalex.org/C2777059624","wikidata":"https://www.wikidata.org/wiki/Q914359","display_name":"Cloud storage","level":3,"score":0.5893170833587646},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5255652666091919},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5126177668571472},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4986388683319092},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.4732169210910797},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3500218391418457},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2827186584472656},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08519747853279114}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asonam55673.2022.10068661","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/asonam55673.2022.10068661","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1969126835","https://openalex.org/W1993284846","https://openalex.org/W2799944323","https://openalex.org/W2972359092","https://openalex.org/W2982562745","https://openalex.org/W4298857966","https://openalex.org/W6637967152","https://openalex.org/W6691284710"],"related_works":["https://openalex.org/W4285245558","https://openalex.org/W1437397736","https://openalex.org/W2048606985","https://openalex.org/W2375056709","https://openalex.org/W4301155776","https://openalex.org/W2012359782","https://openalex.org/W2952411620","https://openalex.org/W2348132657","https://openalex.org/W1588783802","https://openalex.org/W1892538828"],"abstract_inverted_index":{"Deduplication":[0,52],"is":[1,22,43,53,126],"the":[2,103],"process":[3],"of":[4,27,105],"removing":[5],"replicated":[6],"data":[7,28,33,46,94],"content":[8],"from":[9],"storage":[10,37,47,66],"facilities":[11],"like":[12,98],"online":[13],"databases,":[14],"cloud":[15,50],"datastore,":[16],"local":[17],"file":[18,56,108],"systems,":[19],"etc.":[20],"It":[21],"commonly":[23],"performed":[24],"as":[25,75],"part":[26],"preprocessing":[29],"to":[30,117],"eliminate":[31],"redundant":[32],"that":[34],"requires":[35],"extra":[36],"spaces":[38],"and":[39,42],"computing":[40],"power":[41],"crucial":[44],"for":[45,55],"management":[48],"in":[49,80],"computing.":[51],"essential":[54],"backup":[57,72],"systems":[58],"since":[59],"duplicated":[60],"files":[61,85],"will":[62],"presumably":[63],"consume":[64],"more":[65],"space,":[67],"especially":[68],"with":[69],"a":[70,119,138],"short":[71],"period":[73],"such":[74,107],"daily.":[76],"A":[77],"common":[78],"technique":[79],"this":[81],"field":[82],"involves":[83],"splitting":[84],"into":[86],"chunks":[87],"whose":[88],"hashes":[89],"can":[90],"be":[91],"compared":[92],"using":[93],"structures":[95],"or":[96],"techniques":[97],"clustering.":[99],"This":[100],"paper":[101],"explores":[102],"possibility":[104],"performing":[106],"chunk":[109],"deduplication":[110,121,133,141],"leveraging":[111],"an":[112],"innovative":[113],"reinforcement":[114],"learning":[115],"approach":[116],"achieve":[118],"high":[120],"ratio.":[122],"The":[123],"proposed":[124],"system":[125],"named":[127],"SegDup,":[128],"which":[129],"achieves":[130],"13%":[131],"higher":[132],"ratio":[134],"than":[135],"Extreme":[136],"Binning,":[137],"state-of-the":[139],"art":[140],"algorithm.":[142]},"counts_by_year":[],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
