{"id":"https://openalex.org/W3045786285","doi":"https://doi.org/10.1109/tkde.2020.3012472","title":"A Hybrid Data Cleaning Framework Using Markov Logic Networks","display_name":"A Hybrid Data Cleaning Framework Using Markov Logic Networks","publication_year":2020,"publication_date":"2020-07-28","ids":{"openalex":"https://openalex.org/W3045786285","doi":"https://doi.org/10.1109/tkde.2020.3012472","mag":"3045786285"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2020.3012472","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2020.3012472","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088891882","display_name":"Congcong Ge","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Congcong Ge","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006238145","display_name":"Yunjun Gao","orcid":"https://orcid.org/0000-0003-3816-8450"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunjun Gao","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014150958","display_name":"Xiaoye Miao","orcid":"https://orcid.org/0000-0002-8632-1539"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoye Miao","raw_affiliation_strings":["Center for Data Science, Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Center for Data Science, Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100757927","display_name":"Bin Yao","orcid":"https://orcid.org/0000-0002-6478-4209"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Yao","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049707744","display_name":"Haobo Wang","orcid":"https://orcid.org/0000-0001-8586-3048"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haobo Wang","raw_affiliation_strings":["College of Computer Science, Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5088891882"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":2.9607,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.91425367,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"34","issue":"5","first_page":"2048","last_page":"2062"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9628000259399414,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/notation","display_name":"Notation","score":0.8055874109268188},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5390156507492065},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4302389919757843},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.42705416679382324},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3721303343772888},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.31821343302726746},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2783176898956299},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.2195521593093872},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10112947225570679}],"concepts":[{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.8055874109268188},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5390156507492065},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4302389919757843},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.42705416679382324},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3721303343772888},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31821343302726746},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2783176898956299},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2195521593093872},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10112947225570679}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2020.3012472","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2020.3012472","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G2462854349","display_name":null,"funder_award_id":"U1609217","funder_id":"https://openalex.org/F4320334064","funder_display_name":"National Natural Science Foundation of China-Zhejiang Joint Fund for the Integration of Industrialization and Informatization"},{"id":"https://openalex.org/G2937796669","display_name":null,"funder_award_id":"2018YFB1004003","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5916212369","display_name":null,"funder_award_id":"61972338","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G628069048","display_name":null,"funder_award_id":"61902343","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334064","display_name":"National Natural Science Foundation of China-Zhejiang Joint Fund for the Integration of Industrialization and Informatization","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1626378195","https://openalex.org/W1964786778","https://openalex.org/W1976732638","https://openalex.org/W1992479406","https://openalex.org/W2044469685","https://openalex.org/W2046298800","https://openalex.org/W2056748234","https://openalex.org/W2081186682","https://openalex.org/W2089206172","https://openalex.org/W2112840274","https://openalex.org/W2153531471","https://openalex.org/W2164187405","https://openalex.org/W2167333415","https://openalex.org/W2170712852","https://openalex.org/W2171332293","https://openalex.org/W2295468252","https://openalex.org/W2437617937","https://openalex.org/W2548122763","https://openalex.org/W2591700809","https://openalex.org/W2804988694","https://openalex.org/W2808345493","https://openalex.org/W2929941791","https://openalex.org/W3000214033","https://openalex.org/W3023049136","https://openalex.org/W3105977086","https://openalex.org/W4300246663","https://openalex.org/W6680599078"],"related_works":["https://openalex.org/W2338700700","https://openalex.org/W2386767533","https://openalex.org/W2012842278","https://openalex.org/W2090686886","https://openalex.org/W2164523229","https://openalex.org/W4300046752","https://openalex.org/W4288772858","https://openalex.org/W4225426323","https://openalex.org/W2131677940","https://openalex.org/W1877300132"],"abstract_inverted_index":{"With":[0],"the":[1,19,62,81,113,117,143,168],"increase":[2],"of":[3,12,18,42,56,70,91,129,154,170],"dirty":[4],"data,":[5],"data":[6,13,37,77,109,119,137,148],"cleaning":[7,23,38,114,120],"turns":[8],"into":[9],"a":[10,34,89,101,127,151],"crux":[11],"analysis.":[14],"The":[15],"accuracy":[16],"limitation":[17],"existing":[20],"integrity":[21,64],"constraints-based":[22],"approaches":[24],"results":[25,160],"from":[26],"insufficient":[27,63],"rules.":[28],"In":[29,80,116],"this":[30],"paper,":[31],"we":[32],"present":[33],"novel":[35,152],"hybrid":[36],"framework":[39],"on":[40,161],"top":[41],"Markov":[43],"logic":[44],"networks":[45],"(MLNs),":[46],"termed":[47],"as":[48],"<inline-formula><tex-math":[49,66,84,122,171],"notation=\"LaTeX\">${\\sf":[50,67,85,123,172],"MLNClean}$</tex-math></inline-formula>":[51,68,86,124,173],",":[52],"which":[53],"is":[54],"capable":[55],"learning":[57],"instantiated":[58,93],"rules":[59,94],"to":[60,96,106,132],"supplement":[61],"constraints.":[65],"consists":[69],"two":[71],"steps,":[72],"i.e.,":[73],"<i>pre-processing</i>":[74],"and":[75,98,111,140,164],"<i>two-stage":[76],"cleaning</i>":[78],".":[79,157],"pre-processing":[82],"step,":[83,121],"first":[87,125],"infers":[88],"set":[90],"probable":[92],"according":[95],"MLNs":[97],"then":[99],"builds":[100],"two-layer":[102],"MLN":[103],"index":[104],"structure":[105],"generate":[107],"multiple":[108],"versions":[110],"facilitate":[112],"process.":[115],"two-stage":[118],"presents":[126],"concept":[128,153],"<i>reliability":[130],"score</i>":[131,156],"clean":[133],"errors":[134],"within":[135],"each":[136],"version":[138,149],"separately,":[139],"afterward":[141],"eliminates":[142],"conflict":[144],"values":[145],"among":[146],"different":[147],"using":[150],"<i>fusion":[155],"Considerable":[158],"experimental":[159],"both":[162],"real":[163],"synthetic":[165],"scenarios":[166],"demonstrate":[167],"effectiveness":[169],"in":[174],"practice.":[175]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
