{"id":"https://openalex.org/W4382724591","doi":"https://doi.org/10.1109/tpds.2023.3290770","title":"Accelerating Content-Defined Chunking for Data Deduplication Based on Speculative Jump","display_name":"Accelerating Content-Defined Chunking for Data Deduplication Based on Speculative Jump","publication_year":2023,"publication_date":"2023-06-29","ids":{"openalex":"https://openalex.org/W4382724591","doi":"https://doi.org/10.1109/tpds.2023.3290770"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2023.3290770","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3290770","pdf_url":"https://ieeexplore.ieee.org/ielx7/71/10180399/10168293.pdf","source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/71/10180399/10168293.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101436007","display_name":"Xiaozhong Jin","orcid":"https://orcid.org/0009-0008-0927-4274"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaozhong Jin","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033955608","display_name":"Haikun Liu","orcid":"https://orcid.org/0000-0003-4290-1408"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haikun Liu","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102871859","display_name":"Chencheng Ye","orcid":"https://orcid.org/0000-0003-3432-855X"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chencheng Ye","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022398389","display_name":"Xiaofei Liao","orcid":"https://orcid.org/0000-0001-6302-813X"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei Liao","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022262922","display_name":"Hai Jin","orcid":"https://orcid.org/0000-0002-3934-7605"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100433508","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-2052-2231"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, and Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101436007"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":4.53,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.95005533,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"34","issue":"9","first_page":"2568","last_page":"2579"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9156742691993713},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.8436230421066284},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8363171219825745},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.8140513896942139},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.7336249947547913},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6133959889411926},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.48981210589408875},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.45450928807258606},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34998711943626404},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3320155143737793},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2777078151702881},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21060135960578918},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1626150906085968},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.12920257449150085},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08597445487976074}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9156742691993713},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.8436230421066284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8363171219825745},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.8140513896942139},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.7336249947547913},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6133959889411926},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.48981210589408875},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.45450928807258606},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34998711943626404},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3320155143737793},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2777078151702881},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21060135960578918},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1626150906085968},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.12920257449150085},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08597445487976074},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2023.3290770","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3290770","pdf_url":"https://ieeexplore.ieee.org/ielx7/71/10180399/10168293.pdf","source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/tpds.2023.3290770","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3290770","pdf_url":"https://ieeexplore.ieee.org/ielx7/71/10180399/10168293.pdf","source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2326962506","display_name":null,"funder_award_id":"61929103","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4565136922","display_name":null,"funder_award_id":"61825202","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5387719375","display_name":null,"funder_award_id":"2022YFB4500303","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5881942141","display_name":null,"funder_award_id":"202103","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G735754199","display_name":null,"funder_award_id":"6182520","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7588023154","display_name":null,"funder_award_id":"62072198","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7761692050","display_name":null,"funder_award_id":"2021035","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4382724591.pdf","grobid_xml":"https://content.openalex.org/works/W4382724591.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1521996498","https://openalex.org/W1553098517","https://openalex.org/W1614703486","https://openalex.org/W1639305476","https://openalex.org/W1969335064","https://openalex.org/W1976024527","https://openalex.org/W2056980397","https://openalex.org/W2110322986","https://openalex.org/W2475932436","https://openalex.org/W2481696877","https://openalex.org/W2916086000","https://openalex.org/W2935106878","https://openalex.org/W2979451760","https://openalex.org/W2986445348","https://openalex.org/W3006159724","https://openalex.org/W3006437988","https://openalex.org/W3014193728","https://openalex.org/W3195673285","https://openalex.org/W3209955552","https://openalex.org/W4288070990","https://openalex.org/W6628275343","https://openalex.org/W6705331335","https://openalex.org/W6760689153","https://openalex.org/W6793350840"],"related_works":["https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W4387951120","https://openalex.org/W3044766408","https://openalex.org/W2254835506","https://openalex.org/W4312753418","https://openalex.org/W1521996498","https://openalex.org/W3158009944","https://openalex.org/W4200376613","https://openalex.org/W2356209611"],"abstract_inverted_index":{"In":[0,66],"data":[1,33,63,118],"deduplication":[2,11,64,156,183],"systems,":[3],"chunking":[4,167],"has":[5],"a":[6,23,46,90,103,112],"significant":[7],"impact":[8,132],"on":[9,139,171],"the":[10,31,42,60,75,83,106,116,121,125,131,134,137,140,147,155,164,175],"ratio":[12],"and":[13,36,73,105,136,149],"throughput.":[14],"Existing":[15],"<italic":[16,91],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[17,92],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Content-Defined":[18],"Chunking</i>":[19],"(CDC)":[20],"approaches":[21,52,178],"exploit":[22],"sliding":[24,107],"window":[25,108],"to":[26,101],"calculate":[27],"rolling":[28,43,76,122],"hashes":[29,77,123],"of":[30,62,86,115,133,151,166],"input":[32,117],"stream":[34,119],"byte-by-byte,":[35],"then":[37],"determine":[38],"chunk":[39,141],"cut-points":[40],"if":[41,120],"hash":[44],"satisfies":[45],"given":[47],"cut-condition.":[48],"Since":[49],"previous":[50],"CDC":[51,177],"are":[53],"extremely":[54],"costly,":[55],"it":[56],"often":[57],"significantly":[58],"degrades":[59],"throughput":[61,165],"systems.":[65],"this":[67],"paper,":[68],"we":[69,88,128],"argue":[70],"that":[71,161],"calculating":[72],"checking":[74],"byte-by-byte":[78],"is":[79,100],"unnecessary.":[80],"To":[81],"reduce":[82],"CPU":[84],"overhead":[85],"CDC,":[87],"propose":[89],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">jump-based":[93],"chunking</i>":[94],"(JC)":[95],"approach.":[96],"The":[97],"key":[98],"idea":[99],"introduce":[102],"jump-condition,":[104],"can":[109],"jump":[110],"over":[111],"specific":[113],"length":[114],"satisfy":[124],"jump-condition.":[126],"Moreover,":[127],"also":[129],"explore":[130],"cut-condition":[135],"jump-condition":[138],"size.":[142],"Our":[143],"theoretic":[144],"studies":[145],"demonstrate":[146],"effectiveness":[148],"efficiency":[150],"JC,":[152],"without":[153],"compromising":[154],"ratio.":[157,184],"Experimental":[158],"results":[159],"show":[160],"JC":[162],"improves":[163],"by":[168],"about":[169],"2\u00d7":[170],"average":[172],"compared":[173],"with":[174],"state-of-the-art":[176],"while":[179],"still":[180],"guaranteeing":[181],"high":[182]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
