{"id":"https://openalex.org/W2971148757","doi":"https://doi.org/10.1145/3328519.3329133","title":"Towards an End-to-End Human-Centric Data Cleaning Framework","display_name":"Towards an End-to-End Human-Centric Data Cleaning Framework","publication_year":2019,"publication_date":"2019-07-05","ids":{"openalex":"https://openalex.org/W2971148757","doi":"https://doi.org/10.1145/3328519.3329133","mag":"2971148757"},"language":"en","primary_location":{"id":"doi:10.1145/3328519.3329133","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3328519.3329133","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3328519.3329133","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Human-In-the-Loop Data Analytics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3328519.3329133","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004937696","display_name":"El Kindi Rezig","orcid":"https://orcid.org/0000-0002-5187-3499"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"El Kindi Rezig","raw_affiliation_strings":["MIT CSAIL"],"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026923999","display_name":"Mourad Ouzzani","orcid":"https://orcid.org/0000-0002-4035-3025"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mourad Ouzzani","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089912733","display_name":"Ahmed K. Elmagarmid","orcid":"https://orcid.org/0000-0002-0044-458X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed K. Elmagarmid","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000123743","display_name":"Walid G. Aref","orcid":"https://orcid.org/0000-0001-8169-7775"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Walid G. Aref","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074724644","display_name":"Michael Stonebraker","orcid":"https://orcid.org/0000-0001-9184-9058"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael Stonebraker","raw_affiliation_strings":["MIT CSAIL"],"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5004937696"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8561,"has_fulltext":true,"cited_by_count":18,"citation_normalized_percentile":{"value":0.86655558,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9643999934196472,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7375709414482117},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.7016769647598267},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6622633934020996},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5108925700187683},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5051434636116028},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.48905882239341736},{"id":"https://openalex.org/keywords/end-user","display_name":"End user","score":0.4437218904495239},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35827016830444336},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29696518182754517},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.16226735711097717},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.0967155396938324}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7375709414482117},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.7016769647598267},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6622633934020996},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5108925700187683},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5051434636116028},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.48905882239341736},{"id":"https://openalex.org/C91262260","wikidata":"https://www.wikidata.org/wiki/Q528074","display_name":"End user","level":2,"score":0.4437218904495239},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35827016830444336},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29696518182754517},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.16226735711097717},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0967155396938324},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3328519.3329133","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3328519.3329133","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3328519.3329133","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Human-In-the-Loop Data Analytics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3328519.3329133","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3328519.3329133","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3328519.3329133","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Human-In-the-Loop Data Analytics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2971148757.pdf","grobid_xml":"https://content.openalex.org/works/W2971148757.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1992479406","https://openalex.org/W2044469685","https://openalex.org/W2046298800","https://openalex.org/W2047182010","https://openalex.org/W2047745978","https://openalex.org/W2056748234","https://openalex.org/W2063103859","https://openalex.org/W2099637074","https://openalex.org/W2106675345","https://openalex.org/W2106895292","https://openalex.org/W2108991785","https://openalex.org/W2137775416","https://openalex.org/W2167333415","https://openalex.org/W2225677724","https://openalex.org/W2238711864","https://openalex.org/W2288244345","https://openalex.org/W2329105431","https://openalex.org/W2407536635","https://openalex.org/W2427822648","https://openalex.org/W2544486974","https://openalex.org/W2591700809","https://openalex.org/W2612526608","https://openalex.org/W2612989134","https://openalex.org/W2613390631","https://openalex.org/W2616382011","https://openalex.org/W2767280887","https://openalex.org/W2803396353","https://openalex.org/W2809037461","https://openalex.org/W3099883947","https://openalex.org/W3146259567","https://openalex.org/W4293582904"],"related_works":["https://openalex.org/W4299590256","https://openalex.org/W2951281592","https://openalex.org/W2166381389","https://openalex.org/W4284893874","https://openalex.org/W3163634122","https://openalex.org/W3119482857","https://openalex.org/W2919182614","https://openalex.org/W333503034","https://openalex.org/W2054736184","https://openalex.org/W3159728998"],"abstract_inverted_index":{"Data":[0],"Cleaning":[1],"refers":[2],"to":[3,73,101,108,113,127,136,142,144,192,195,214],"the":[4,12,65,74,103,106,117,138,166,171,209,219],"process":[5,23],"of":[6,21,36,44,51,58,170],"detecting":[7,48],"and":[8,54,186,204],"fixing":[9],"errors":[10,46],"in":[11,64,116,132,165,218],"data.":[13],"Human":[14],"involvement":[15],"is":[16,33,70,124,154],"instrumental":[17],"at":[18],"several":[19,84],"stages":[20],"this":[22,68,176,182,197,212],"such":[24],"as":[25],"providing":[26],"rules":[27],"or":[28,151],"validating":[29],"computed":[30],"repairs.":[31],"There":[32,153],"a":[34,41,62,79,201],"plethora":[35],"data":[37,45,81,85,146,158],"cleaning":[38,76,82,86,118,159,167,173,220],"algorithms":[39,60],"addressing":[40],"wide":[42],"range":[43],"(e.g.,":[47],"duplicates,":[49],"violations":[50],"integrity":[52],"constraints,":[53],"missing":[55],"values).":[56],"Many":[57],"these":[59,97],"involve":[61,114],"human":[63],"loop,":[66],"however,":[67],"latter":[69],"usually":[71],"coupled":[72],"underlying":[75,172],"algorithms.":[77,174],"In":[78,175],"real":[80],"pipeline,":[83],"operations":[87],"are":[88],"performed":[89],"using":[90],"different":[91],"tools.":[92],"A":[93],"high-level":[94],"reasoning":[95],"on":[96],"tools,":[98],"when":[99],"combined":[100],"repair":[102],"data,":[104],"has":[105],"potential":[107],"unlock":[109],"useful":[110],"use":[111],"cases":[112],"humans":[115,140,164,217],"process.":[119,221],"Additionally,":[120],"we":[121,178],"believe":[122],"there":[123],"an":[125],"opportunity":[126],"benefit":[128],"from":[129],"recent":[130],"advances":[131],"active":[133],"learning":[134],"methods":[135],"minimize":[137],"effort":[139],"have":[141],"spend":[143],"verify":[145],"items":[147],"produced":[148],"by":[149],"tools":[150],"humans.":[152],"currently":[155],"no":[156],"end-to-end":[157],"framework":[160,183,213],"that":[161,181,190,207],"systematically":[162],"involves":[163],"pipeline":[168],"regardless":[169],"paper,":[177],"present":[179,200],"opportunities":[180],"could":[184],"offer,":[185],"highlight":[187],"key":[188],"challenges":[189],"need":[191,210],"be":[193],"addressed":[194],"realize":[196],"vision.":[198],"We":[199],"design":[202],"vision":[203],"discuss":[205],"scenarios":[206],"motivate":[208],"for":[211],"judiciously":[215],"assist":[216]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
