{"id":"https://openalex.org/W3001182873","doi":"https://doi.org/10.1109/cisp-bmei48845.2019.8965818","title":"Optimization of De-duplication Technology Based on CDC Blocking Algorithm","display_name":"Optimization of De-duplication Technology Based on CDC Blocking Algorithm","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W3001182873","doi":"https://doi.org/10.1109/cisp-bmei48845.2019.8965818","mag":"3001182873"},"language":"en","primary_location":{"id":"doi:10.1109/cisp-bmei48845.2019.8965818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cisp-bmei48845.2019.8965818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 12th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112500856","display_name":"Junyang Nie","orcid":null},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junyang Nie","raw_affiliation_strings":["Guangxi university, School of Computer, Electronics and Information, Nanning, China"],"affiliations":[{"raw_affiliation_string":"Guangxi university, School of Computer, Electronics and Information, Nanning, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101906293","display_name":"Lijuan Wu","orcid":"https://orcid.org/0000-0002-9049-4686"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lijuan Wu","raw_affiliation_strings":["Guangxi university, Information network center, Nanning, China"],"affiliations":[{"raw_affiliation_string":"Guangxi university, Information network center, Nanning, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027051176","display_name":"Jingzhang Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingzhang Liang","raw_affiliation_strings":["Guangxi university, School of Electrical Engineering, Nanning, China"],"affiliations":[{"raw_affiliation_string":"Guangxi university, School of Electrical Engineering, Nanning, China","institution_ids":["https://openalex.org/I150807315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112500856"],"corresponding_institution_ids":["https://openalex.org/I150807315"],"apc_list":null,"apc_paid":null,"fwci":0.6783,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.8,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.7463172078132629},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.743018388748169},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5033456683158875},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.49437952041625977},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4634179174900055},{"id":"https://openalex.org/keywords/hotspot","display_name":"Hotspot (geology)","score":0.4437190592288971},{"id":"https://openalex.org/keywords/data-processing","display_name":"Data processing","score":0.43776506185531616},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.4165581464767456},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2066853940486908},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1296486258506775},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.09402546286582947}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.7463172078132629},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.743018388748169},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5033456683158875},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.49437952041625977},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4634179174900055},{"id":"https://openalex.org/C146481406","wikidata":"https://www.wikidata.org/wiki/Q105131","display_name":"Hotspot (geology)","level":2,"score":0.4437190592288971},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.43776506185531616},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.4165581464767456},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2066853940486908},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1296486258506775},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.09402546286582947},{"id":"https://openalex.org/C8058405","wikidata":"https://www.wikidata.org/wiki/Q46255","display_name":"Geophysics","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cisp-bmei48845.2019.8965818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cisp-bmei48845.2019.8965818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 12th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1835454951","https://openalex.org/W2238711864","https://openalex.org/W2270436279","https://openalex.org/W2346097572","https://openalex.org/W2475932436","https://openalex.org/W2554914960","https://openalex.org/W2606291150","https://openalex.org/W4293582904","https://openalex.org/W6638668855"],"related_works":["https://openalex.org/W2766145069","https://openalex.org/W3003602898","https://openalex.org/W4312554896","https://openalex.org/W2515214618","https://openalex.org/W2787300667","https://openalex.org/W2547510008","https://openalex.org/W4296041332","https://openalex.org/W2313358680","https://openalex.org/W2736204053","https://openalex.org/W2805733941"],"abstract_inverted_index":{"All":[0],"kinds":[1],"of":[2,13,25,32,56,83],"information":[3],"data":[4,15,26,39,61,73,78,95,99,107],"show":[5],"exponential":[6],"growth,":[7],"and":[8,28,81,94,98],"there":[9],"are":[10,102],"a":[11,42],"lot":[12],"duplicate":[14,38],"in":[16,45,76,104],"the":[17,23,46,57],"mass":[18],"data,":[19],"which":[20],"seriously":[21],"affects":[22],"efficiency":[24,80,97],"processing":[27,79,96],"causes":[29],"serious":[30],"waste":[31,82],"storage":[33,48,84],"space.":[34,85],"How":[35],"to":[36],"delete":[37],"has":[40],"become":[41],"research":[43],"hotspot":[44],"current":[47],"field.":[49],"The":[50],"content-defined":[51],"chunking(CDC)":[52],"algorithm":[53,91],"is":[54,92],"one":[55],"most":[58],"widely":[59],"used":[60],"de-duplication":[62,100],"algorithm,":[63],"but":[64],"it":[65],"often":[66],"produces":[67],"too":[68,71],"small":[69],"or":[70],"large":[72],"blocks,":[74],"resulting":[75],"low":[77],"In":[86],"this":[87],"paper,":[88],"CDC":[89],"chunking":[90],"optimized,":[93],"rate":[101],"improved":[103],"three":[105],"experimental":[106],"sets":[108],"through":[109],"experiments.":[110]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
