{"id":"https://openalex.org/W2089206172","doi":"https://doi.org/10.1145/2463676.2463706","title":"Don't be SCAREd","display_name":"Don't be SCAREd","publication_year":2013,"publication_date":"2013-06-22","ids":{"openalex":"https://openalex.org/W2089206172","doi":"https://doi.org/10.1145/2463676.2463706","mag":"2089206172"},"language":"en","primary_location":{"id":"doi:10.1145/2463676.2463706","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2463676.2463706","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103524335","display_name":"Mohamed Yakout","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mohamed Yakout","raw_affiliation_strings":["Microsoft Corp., Bellevue, WA, USA","Microsoft Corporation, Bellevue, WA, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp., Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]},{"raw_affiliation_string":"Microsoft Corporation, Bellevue, WA, USA#TAB#","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091872345","display_name":"Laure Berti\u2010\u00c9quille","orcid":"https://orcid.org/0000-0002-8046-0570"},"institutions":[{"id":"https://openalex.org/I1306264927","display_name":"Institut de Recherche pour le D\u00e9veloppement","ror":"https://ror.org/032qezt74","country_code":"BJ","type":"government","lineage":["https://openalex.org/I1306264927","https://openalex.org/I2802818602","https://openalex.org/I4210090127","https://openalex.org/I4210131494","https://openalex.org/I4210166444"]}],"countries":["BJ"],"is_corresponding":false,"raw_author_name":"Laure Berti-\u00c9quille","raw_affiliation_strings":["Institut de Recherche pour le D\u00e9veloppement, Aix-en-Provence, France"],"affiliations":[{"raw_affiliation_string":"Institut de Recherche pour le D\u00e9veloppement, Aix-en-Provence, France","institution_ids":["https://openalex.org/I1306264927"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089912733","display_name":"Ahmed K. Elmagarmid","orcid":"https://orcid.org/0000-0002-0044-458X"},"institutions":[{"id":"https://openalex.org/I1301390666","display_name":"Qatar Airways (Qatar)","ror":"https://ror.org/01hx00y13","country_code":"QA","type":"company","lineage":["https://openalex.org/I1301390666"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Ahmed K. Elmagarmid","raw_affiliation_strings":["Qatar Computing Research Institute, Doha, Qatar","Qatar Computing Research Institute [Doha, Qatar]"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, Doha, Qatar","institution_ids":["https://openalex.org/I1301390666"]},{"raw_affiliation_string":"Qatar Computing Research Institute [Doha, Qatar]","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103524335"],"corresponding_institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"],"apc_list":null,"apc_paid":null,"fwci":14.976,"has_fulltext":false,"cited_by_count":145,"citation_normalized_percentile":{"value":0.99139682,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"553","last_page":"564"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8318337202072144},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8229801654815674},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5947204828262329},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5917041301727295},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5617804527282715},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5610731840133667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47500526905059814},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.44961801171302795},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4149840474128723},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.41491737961769104},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.19864147901535034}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8318337202072144},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8229801654815674},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5947204828262329},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5917041301727295},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5617804527282715},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5610731840133667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47500526905059814},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.44961801171302795},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4149840474128723},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.41491737961769104},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19864147901535034},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2463676.2463706","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2463676.2463706","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:docs.lib.purdue.edu:ccpubs-1539","is_oa":false,"landing_page_url":"https://docs.lib.purdue.edu/ccpubs/530","pdf_url":null,"source":{"id":"https://openalex.org/S4377196310","display_name":"Purdue e-Pubs (Purdue University System)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801333002","host_organization_name":"Purdue University System","host_organization_lineage":["https://openalex.org/I2801333002"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Cyber Center Publications","raw_type":"text"},{"id":"pmh:oai:HAL:hal-01855779v1","is_oa":false,"landing_page_url":"https://inria.hal.science/hal-01855779","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data (SIGMOD'13), Jun 2013, New York, United States. pp.553-564 &#x27E8;10.1145/2463676.2463706&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W77952428","https://openalex.org/W146125889","https://openalex.org/W235821689","https://openalex.org/W308556676","https://openalex.org/W1534477342","https://openalex.org/W1572401739","https://openalex.org/W1853963442","https://openalex.org/W1860991815","https://openalex.org/W2010787744","https://openalex.org/W2027048490","https://openalex.org/W2032279394","https://openalex.org/W2034841618","https://openalex.org/W2047745978","https://openalex.org/W2059009730","https://openalex.org/W2063103859","https://openalex.org/W2065000833","https://openalex.org/W2077783414","https://openalex.org/W2108991785","https://openalex.org/W2136709660","https://openalex.org/W2137775416","https://openalex.org/W2147805208","https://openalex.org/W2150884987","https://openalex.org/W2154462399","https://openalex.org/W2161163216","https://openalex.org/W2164187405","https://openalex.org/W4285719527","https://openalex.org/W6680599078","https://openalex.org/W6683181193"],"related_works":["https://openalex.org/W4255837520","https://openalex.org/W2389214306","https://openalex.org/W4235240664","https://openalex.org/W2965083567","https://openalex.org/W2387011115","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W1983399550"],"abstract_inverted_index":{"Various":[0],"computational":[1],"procedures":[2],"or":[3],"constraint-based":[4],"methods":[5,89],"for":[6,90,140,167],"data":[7,53,65,68,142,160,177,213],"repairing":[8,54,104],"have":[9,27],"been":[10],"proposed":[11],"over":[12],"the":[13,31,36,44,61,67,103,108,112,118,153,186,198],"last":[14],"decades":[15],"to":[16,38,117,151,159,184,211],"identify":[17],"errors":[18],"and,":[19],"when":[20],"possible,":[21],"correct":[22],"them.":[23],"However,":[24],"these":[25],"approaches":[26],"several":[28,162],"limitations":[29],"including":[30],"scalability":[32,202],"and":[33,87,111,144,189,201],"quality":[34,100],"of":[35,43,63,102,114,147,155,203],"values":[37],"be":[39,72,165],"used":[40],"in":[41,209],"replacement":[42,64],"errors.":[45],"In":[46],"this":[47],"paper,":[48],"we":[49,180,195],"propose":[50,121,181],"a":[51,81,99,126,137,145,168,182],"new":[52],"approach":[55,83,205],"that":[56,130],"is":[57,80],"based":[58,106,171],"on":[59,107,136,172,175,206],"maximizing":[60],"likelihood":[62,88,109],"given":[66],"distribution,":[69],"which":[70],"can":[71,164],"modeled":[73],"using":[74],"statistical":[75],"machine":[76,85,148],"learning":[77,86,149],"techniques.":[78],"This":[79],"novel":[82],"combining":[84],"cleaning":[91,214],"dirty":[92],"databases":[93],"by":[94],"value":[95],"modification.":[96],"We":[97,120],"develop":[98],"measure":[101],"updates":[105,163],"benefit":[110],"amount":[113],"changes":[115],"applied":[116],"database.":[119],"SCARE":[122,134],"(SCalable":[123],"Automatic":[124],"REpairing),":[125],"systematic":[127],"scalable":[128],"framework":[129],"follows":[131],"our":[132,204],"approach.":[133],"relies":[135],"robust":[138],"mechanism":[139,183],"horizontal":[141],"partitioning":[143],"combination":[146],"techniques":[150],"predict":[152],"set":[154],"possible":[156],"updates.":[157],"Due":[158],"partitioning,":[161],"predicted":[166],"single":[169],"record":[170],"local":[173,187],"views":[174],"each":[176],"partition.":[178],"Therefore,":[179],"combine":[185],"predictions":[188],"obtain":[190],"accurate":[191],"final":[192],"predictions.":[193],"Finally,":[194],"experimentally":[196],"demonstrate":[197],"effectiveness,":[199],"efficiency,":[200],"real-world":[207],"datasets":[208],"comparison":[210],"recent":[212],"approaches.":[215]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":21},{"year":2020,"cited_by_count":23},{"year":2019,"cited_by_count":15},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":14},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":9}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2016-06-24T00:00:00"}
