{"id":"https://openalex.org/W2559785442","doi":"https://doi.org/10.1109/tkde.2016.2637928","title":"A Novel Cost-Based Model for Data Repairing","display_name":"A Novel Cost-Based Model for Data Repairing","publication_year":2016,"publication_date":"2016-12-09","ids":{"openalex":"https://openalex.org/W2559785442","doi":"https://doi.org/10.1109/tkde.2016.2637928","mag":"2559785442"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2016.2637928","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2637928","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027549447","display_name":"Shuang Hao","orcid":"https://orcid.org/0000-0003-2756-6015"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuang Hao","raw_affiliation_strings":["Department of Computer Science, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101824160","display_name":"Nan Tang","orcid":"https://orcid.org/0000-0003-2832-0295"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Nan Tang","raw_affiliation_strings":["Qatar Computing Research Institute, Hamad Bin Khalifa Univeristy, Doha, Qatar"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa Univeristy, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451576","display_name":"Guoliang Li","orcid":"https://orcid.org/0000-0002-1398-0621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Li","raw_affiliation_strings":["Department of Computer Science, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101752229","display_name":"Jian He","orcid":"https://orcid.org/0000-0003-4892-7852"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian He","raw_affiliation_strings":["Department of Computer Science, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101463641","display_name":"Na Ta","orcid":"https://orcid.org/0000-0002-6586-8332"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Na Ta","raw_affiliation_strings":["Department of Computer Science, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100951661","display_name":"Jianhua Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhua Feng","raw_affiliation_strings":["Department of Computer Science, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5027549447"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":4.3086,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.94458474,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"29","issue":"4","first_page":"727","last_page":"742"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9751999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7489054203033447},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7209344506263733},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6421588063240051},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6360286474227905},{"id":"https://openalex.org/keywords/data-integrity","display_name":"Data integrity","score":0.6096946001052856},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5932096242904663},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5307011008262634},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4956098794937134},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4919099807739258},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46548452973365784},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4325253963470459},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4113420248031616},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.372768759727478},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2712036371231079},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25120776891708374},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.217548668384552},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13394102454185486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7489054203033447},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7209344506263733},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6421588063240051},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6360286474227905},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.6096946001052856},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5932096242904663},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5307011008262634},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4956098794937134},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4919099807739258},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46548452973365784},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4325253963470459},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4113420248031616},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.372768759727478},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2712036371231079},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25120776891708374},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.217548668384552},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13394102454185486},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2016.2637928","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2637928","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5299999713897705}],"awards":[{"id":"https://openalex.org/G5452479108","display_name":null,"funder_award_id":"61373024","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5652550493","display_name":null,"funder_award_id":"61632016","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6973711173","display_name":null,"funder_award_id":"61521002","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8304784474","display_name":null,"funder_award_id":"FDCT/116/2013/A3","funder_id":"https://openalex.org/F4320322624","funder_display_name":"Tsinghua National Laboratory for Information Science and Technology"},{"id":"https://openalex.org/G8827671557","display_name":null,"funder_award_id":"MYRG105 (Y1-L3)-FST13-GZ","funder_id":"https://openalex.org/F4320322624","funder_display_name":"Tsinghua National Laboratory for Information Science and Technology"},{"id":"https://openalex.org/G978999948","display_name":null,"funder_award_id":"61422205","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320316083","display_name":"Tencent","ror":"https://ror.org/00hhjss72"},{"id":"https://openalex.org/F4320322624","display_name":"Tsinghua National Laboratory for Information Science and Technology","ror":"https://ror.org/03cve4549"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W4175336","https://openalex.org/W1626378195","https://openalex.org/W1980706971","https://openalex.org/W2013627627","https://openalex.org/W2044022210","https://openalex.org/W2044469685","https://openalex.org/W2046298800","https://openalex.org/W2047745978","https://openalex.org/W2059009730","https://openalex.org/W2063103859","https://openalex.org/W2078132546","https://openalex.org/W2081186682","https://openalex.org/W2089206172","https://openalex.org/W2089417393","https://openalex.org/W2106895292","https://openalex.org/W2108132403","https://openalex.org/W2112840274","https://openalex.org/W2113415503","https://openalex.org/W2114764731","https://openalex.org/W2119803607","https://openalex.org/W2131060875","https://openalex.org/W2137479650","https://openalex.org/W2137775416","https://openalex.org/W2142472956","https://openalex.org/W2147805208","https://openalex.org/W2161163216","https://openalex.org/W2162449239","https://openalex.org/W2163600218","https://openalex.org/W2164187405","https://openalex.org/W2165528679","https://openalex.org/W2167333415","https://openalex.org/W2171332293","https://openalex.org/W2178708778","https://openalex.org/W2182787248","https://openalex.org/W2262592273","https://openalex.org/W2286724461","https://openalex.org/W2329105431","https://openalex.org/W2513214219","https://openalex.org/W3000214033","https://openalex.org/W6600161640","https://openalex.org/W6645410415","https://openalex.org/W6676014748","https://openalex.org/W6677534882","https://openalex.org/W6679898556","https://openalex.org/W6680599078","https://openalex.org/W6686029465"],"related_works":["https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W4252555497","https://openalex.org/W3121175838","https://openalex.org/W3016293053","https://openalex.org/W2401723157","https://openalex.org/W2952904874","https://openalex.org/W4389302559","https://openalex.org/W1690653314","https://openalex.org/W2138007485"],"abstract_inverted_index":{"Integrity":[0],"constraint":[1],"based":[2],"data":[3,82,126,160],"repairing":[4,161],"is":[5,173],"an":[6],"iterative":[7],"process":[8,44],"consisting":[9],"of":[10,45,53,65,79,87,125,157],"two":[11],"parts:":[12],"detect":[13],"and":[14,23,47,81,146,155,193,214],"group":[15,28],"errors":[16,49],"that":[17,30,101,135,165,202],"violate":[18],"given":[19],"integrity":[20],"constraints":[21],"(ICs);":[22],"modify":[24],"values":[25,67],"inside":[26],"each":[27,69],"such":[29],"the":[31,43,111,123,136,158,170,198],"modified":[32],"database":[33],"satisfies":[34],"those":[35],"ICs.":[36,88],"However,":[37],"most":[38],"existing":[39,207],"automatic":[40,208],"solutions":[41],"treat":[42],"detecting":[46,145],"grouping":[48,147],"straightforwardly":[50],"(e.g.,":[51],"violations":[52,80],"functional":[54],"dependencies":[55],"using":[56,106],"string":[57,94,107],"equality),":[58],"while":[59],"putting":[60],"more":[61],"attention":[62],"on":[63,93],"heuristics":[64],"modifying":[66],"within":[68],"group.":[70],"In":[71,188],"this":[72],"paper,":[73],"we":[74,114,190],"propose":[75,116],"a":[76,85,117,140,177],"revised":[77,90,112,137],"semantics":[78,91,138],"consistency":[83],"w.r.t.":[84],"set":[86],"The":[89],"relies":[92],"similarities,":[95],"in":[96,150,169,211],"contrast":[97],"to":[98,121,184,196],"traditional":[99],"methods":[100],"use":[102],"syntactic":[103],"error":[104],"detection":[105],"equality.":[108],"Along":[109],"with":[110],"semantics,":[113],"also":[115],"new":[118,171],"cost":[119,124],"model":[120,172],"quantify":[122],"repair":[127,209],"by":[128],"considering":[129],"distances":[130],"between":[131],"strings.":[132],"We":[133,163,180],"show":[134,201],"provides":[139],"significant":[141],"change":[142],"for":[143,176],"better":[144],"errors,":[148],"which":[149],"turn":[151],"improves":[152],"both":[153,212],"precision":[154,213],"recall":[156],"following":[159],"step.":[162],"prove":[164],"finding":[166],"minimum-cost":[167],"repairs":[168],"NP-hard,":[174],"even":[175],"single":[178],"FD.":[179],"devise":[181],"efficient":[182],"algorithms":[183,210],"find":[185],"approximate":[186],"repairs.":[187],"addition,":[189],"develop":[191],"indices":[192],"optimization":[194],"techniques":[195],"improve":[197],"efficiency.":[199],"Experiments":[200],"our":[203],"approach":[204],"significantly":[205],"outperforms":[206],"recall.":[215]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
