{"id":"https://openalex.org/W4401352353","doi":"https://doi.org/10.14778/3665844.3665862","title":"Sparcle: Boosting the Accuracy of Data Cleaning Systems through Spatial Awareness","display_name":"Sparcle: Boosting the Accuracy of Data Cleaning Systems through Spatial Awareness","publication_year":2024,"publication_date":"2024-05-01","ids":{"openalex":"https://openalex.org/W4401352353","doi":"https://doi.org/10.14778/3665844.3665862"},"language":"en","primary_location":{"id":"doi:10.14778/3665844.3665862","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3665844.3665862","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102577845","display_name":"Yuchuan Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuchuan Huang","raw_affiliation_strings":["University of Minnesota, USA"],"affiliations":[{"raw_affiliation_string":"University of Minnesota, USA","institution_ids":["https://openalex.org/I130238516"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007454642","display_name":"Mohamed F. Mokbel","orcid":"https://orcid.org/0000-0002-6686-1757"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohamed F. Mokbel","raw_affiliation_strings":["University of Minnesota, USA"],"affiliations":[{"raw_affiliation_string":"University of Minnesota, USA","institution_ids":["https://openalex.org/I130238516"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102577845"],"corresponding_institution_ids":["https://openalex.org/I130238516"],"apc_list":null,"apc_paid":null,"fwci":0.4301,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.71715275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"17","issue":"9","first_page":"2349","last_page":"2362"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.810855507850647},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5034007430076599},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.4512998163700104},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3406216502189636},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3319890797138214},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.19727253913879395},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.1695409119129181}],"concepts":[{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.810855507850647},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5034007430076599},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.4512998163700104},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3406216502189636},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3319890797138214},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.19727253913879395},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1695409119129181}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3665844.3665862","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3665844.3665862","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2186686397","https://openalex.org/W2751571889","https://openalex.org/W2964029185","https://openalex.org/W2983641625","https://openalex.org/W3045786285","https://openalex.org/W4224288101","https://openalex.org/W4293582904","https://openalex.org/W6644887767"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1538046993"],"abstract_inverted_index":{"Though":[0],"data":[1,31,112,173,189],"cleaning":[2,32,113,174,190],"systems":[3,33,114,191],"have":[4,81],"earned":[5],"great":[6],"success":[7],"and":[8,14,86,139,176,178],"wide":[9],"spread":[10],"in":[11],"both":[12],"academia":[13],"industry,":[15],"they":[16,152],"fall":[17],"short":[18],"when":[19,192],"trying":[20],"to":[21,48,58,91,127],"clean":[22],"spatial":[23,67,104,132,195],"data.":[24,196],"The":[25],"main":[26,117],"reason":[27],"is":[28,42,73,89,125],"that":[29,50,69,77,102,182],"state-of-the-art":[30,172],"mainly":[34],"rely":[35],"on":[36,158],"functional":[37],"dependency":[38,155],"rules":[39],"where":[40,123,144],"there":[41,72],"sufficient":[43],"co-occurrence":[44,88,124],"of":[45,54,62,110,150,168,188],"value":[46,53,61],"pairs":[47],"learn":[49],"a":[51,59,99,130,154,165,171],"certain":[52,131],"an":[55],"attribute":[56],"leads":[57],"corresponding":[60],"another":[63],"attribute.":[64],"However,":[65],"for":[66],"attributes":[68],"represent":[70],"locations,":[71],"very":[74],"little":[75],"chance":[76],"two":[78,116],"records":[79,145],"would":[80],"the":[82,107,186],"same":[83,136],"exact":[84,137],"coordinates,":[85],"hence":[87],"unlikely":[90],"exist.":[92],"This":[93],"paper":[94],"presents":[95],"Sparcle":[96,169,183],"(SPatially-AwaRe":[97],"CLEaning);":[98],"novel":[100],"framework":[101],"injects":[103],"awareness":[105],"into":[106],"core":[108],"engine":[109],"rule-based":[111],"through":[115],"concepts:":[118],"(1)":[119],"Spatial":[120],"Neighborhood":[121],",":[122,143],"relaxed":[126],"be":[128],"within":[129],"proximity":[133],"rather":[134],"than":[135],"value,":[138],"(2)":[140],"Distance":[141],"Weighting":[142],"are":[146],"given":[147],"different":[148],"weights":[149],"whether":[151],"satisfy":[153],"rule,":[156],"based":[157],"their":[159],"relative":[160],"distance.":[161],"Experimental":[162],"results":[163],"using":[164],"real":[166,177],"deployment":[167],"inside":[170],"system,":[175],"synthetic":[179],"datasets,":[180],"show":[181],"significantly":[184],"boosts":[185],"accuracy":[187],"dealing":[193],"with":[194]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
