{"id":"https://openalex.org/W2936520540","doi":"https://doi.org/10.3390/sym11040575","title":"When Considering More Elements: Attribute Correlation in Unsupervised Data Cleaning under Blocking","display_name":"When Considering More Elements: Attribute Correlation in Unsupervised Data Cleaning under Blocking","publication_year":2019,"publication_date":"2019-04-19","ids":{"openalex":"https://openalex.org/W2936520540","doi":"https://doi.org/10.3390/sym11040575","mag":"2936520540"},"language":"en","primary_location":{"id":"doi:10.3390/sym11040575","is_oa":true,"landing_page_url":"https://doi.org/10.3390/sym11040575","pdf_url":"https://www.mdpi.com/2073-8994/11/4/575/pdf?version=1556184461","source":{"id":"https://openalex.org/S190787756","display_name":"Symmetry","issn_l":"2073-8994","issn":["2073-8994"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Symmetry","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2073-8994/11/4/575/pdf?version=1556184461","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100439832","display_name":"Pei Li","orcid":"https://orcid.org/0000-0001-8699-8825"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pei Li","raw_affiliation_strings":["Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha 410073, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102326790","display_name":"Chaofan Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chaofan Dai","raw_affiliation_strings":["Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha 410073, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100631553","display_name":"Wenqian Wang","orcid":"https://orcid.org/0000-0001-8382-7545"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenqian Wang","raw_affiliation_strings":["Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha 410073, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102326790"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":{"value":2000,"currency":"CHF","value_usd":2165},"apc_paid":{"value":2000,"currency":"CHF","value_usd":2165},"fwci":0.2058,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56763067,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"11","issue":"4","first_page":"575","last_page":"575"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.8307840824127197},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8000779747962952},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6042404174804688},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.601502537727356},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.5369235277175903},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5311447978019714},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.5090274810791016},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.4979252815246582},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44432011246681213},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4361970126628876},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.42595916986465454},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.36830538511276245},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3422206938266754},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1010890007019043}],"concepts":[{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.8307840824127197},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8000779747962952},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6042404174804688},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.601502537727356},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.5369235277175903},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5311447978019714},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.5090274810791016},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.4979252815246582},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44432011246681213},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4361970126628876},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.42595916986465454},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36830538511276245},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3422206938266754},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1010890007019043},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/sym11040575","is_oa":true,"landing_page_url":"https://doi.org/10.3390/sym11040575","pdf_url":"https://www.mdpi.com/2073-8994/11/4/575/pdf?version=1556184461","source":{"id":"https://openalex.org/S190787756","display_name":"Symmetry","issn_l":"2073-8994","issn":["2073-8994"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Symmetry","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:a0133fa98c17426a96065c445371399e","is_oa":true,"landing_page_url":"https://doaj.org/article/a0133fa98c17426a96065c445371399e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Symmetry, Vol 11, Iss 4, p 575 (2019)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2073-8994/11/4/575/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/sym11040575","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Symmetry","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/sym11040575","is_oa":true,"landing_page_url":"https://doi.org/10.3390/sym11040575","pdf_url":"https://www.mdpi.com/2073-8994/11/4/575/pdf?version=1556184461","source":{"id":"https://openalex.org/S190787756","display_name":"Symmetry","issn_l":"2073-8994","issn":["2073-8994"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Symmetry","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5099999904632568,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1976437052","https://openalex.org/W1981877370","https://openalex.org/W1992642694","https://openalex.org/W2048053751","https://openalex.org/W2052642427","https://openalex.org/W2067102178","https://openalex.org/W2133299088","https://openalex.org/W2139490252","https://openalex.org/W2150669688","https://openalex.org/W2247690851","https://openalex.org/W2263060133","https://openalex.org/W2270503162","https://openalex.org/W2368635037","https://openalex.org/W2410531948","https://openalex.org/W2480641892","https://openalex.org/W2524615958","https://openalex.org/W2579124921","https://openalex.org/W2592659532","https://openalex.org/W2616146705","https://openalex.org/W2751863548","https://openalex.org/W2752419508","https://openalex.org/W2767280887","https://openalex.org/W2782835884","https://openalex.org/W2783367877","https://openalex.org/W2819873736","https://openalex.org/W2885895599","https://openalex.org/W2894438030","https://openalex.org/W3008653445","https://openalex.org/W7007516683","https://openalex.org/W7056594427"],"related_works":["https://openalex.org/W4245395944","https://openalex.org/W2143551613","https://openalex.org/W2138823233","https://openalex.org/W1979740464","https://openalex.org/W2143345456","https://openalex.org/W1789991335","https://openalex.org/W2392835431","https://openalex.org/W2562731034","https://openalex.org/W4315705795","https://openalex.org/W1870651561"],"abstract_inverted_index":{"In":[0,71,192],"banks,":[1],"governments,":[2],"and":[3,17,26,80,93,122,134,204,234],"internet":[4,229],"companies,":[5],"due":[6],"to":[7,64,84,129,176],"the":[8,21,41,90,131,136,148,154,159,166,180,189,194,200,208],"increasing":[9],"demand":[10],"for":[11,23,100],"data":[12,24,34,44,76,112,126,142,185],"in":[13,37,89,103,110,215,221],"various":[14],"information":[15,66,222,236],"systems":[16,67,223],"continuously":[18],"shortening":[19],"of":[20,33,43,138,210],"cycle":[22],"collection":[25],"update,":[27],"there":[28,86],"may":[29],"be":[30,219],"a":[31,38,57,96,170,184],"variety":[32],"quality":[35],"issues":[36],"database.":[39],"As":[40],"expansion":[42],"scales,":[45],"methods":[46,128,150,168],"such":[47,227],"as":[48,228],"pre-specifying":[49],"business":[50],"rules":[51],"or":[52,213],"introducing":[53],"expert":[54],"experience":[55],"into":[56,78,183],"repair":[58,91,155,160],"process":[59],"are":[60],"no":[61],"longer":[62],"applicable":[63],"some":[65],"requiring":[68,224],"rapid":[69,225],"responses.":[70],"this":[72,104],"case,":[73],"we":[74,114,163],"divided":[75],"cleaning":[77,102,190],"supervised":[79],"unsupervised":[81,101,111],"forms":[82],"according":[83],"whether":[85],"were":[87],"interventions":[88,214],"processes":[92],"put":[94,177],"forward":[95],"new":[97],"dimension":[98],"suitable":[99],"paper.":[105],"For":[106],"weak":[107],"logic":[108],"errors":[109],"cleaning,":[113],"proposed":[115],"an":[116],"attribute":[117],"correlation-based":[118],"(ACB)-Framework":[119],"under":[120],"blocking,":[121],"designed":[123],"three":[124],"different":[125],"blocking":[127,149,167,197],"reduce":[130,153,199],"time":[132,156,202],"complexity":[133],"test":[135],"impact":[137],"clustering":[139,173],"accuracy":[140,174],"on":[141],"cleaning.":[143],"The":[144],"experiments":[145],"showed":[146],"that":[147,165],"could":[151],"effectively":[152],"by":[157],"maintaining":[158],"validity.":[161],"Moreover,":[162],"concluded":[164],"with":[169,179,196],"too":[171],"high":[172],"tended":[175],"tuples":[178],"same":[181],"elements":[182],"block,":[186],"which":[187,217],"reduced":[188],"ability.":[191],"summary,":[193],"ACB-Framework":[195],"can":[198,218],"corresponding":[201],"cost":[203],"does":[205],"not":[206],"need":[207],"guidance":[209],"domain":[211],"knowledge":[212],"repair,":[216],"applied":[220],"responses,":[226],"web":[230],"pages,":[231],"network":[232],"servers,":[233],"sensor":[235],"acquisition.":[237]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
