{"id":"https://openalex.org/W2053119626","doi":"https://doi.org/10.5220/0005004901890201","title":"Improving Data Cleansing Accuracy - A Model-based Approach","display_name":"Improving Data Cleansing Accuracy - A Model-based Approach","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2053119626","doi":"https://doi.org/10.5220/0005004901890201","mag":"2053119626"},"language":"en","primary_location":{"id":"doi:10.5220/0005004901890201","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0005004901890201","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 3rd International Conference on Data Management Technologies and Applications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.5220/0005004901890201","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039716498","display_name":"Mario Mezzanzanica","orcid":"https://orcid.org/0000-0003-0399-2810"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Mario Mezzanzanica","raw_affiliation_strings":["University of Milan-Bicocca, Italy"],"affiliations":[{"raw_affiliation_string":"University of Milan-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058377858","display_name":"Roberto Boselli","orcid":"https://orcid.org/0000-0002-4574-3137"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Roberto Boselli","raw_affiliation_strings":["University of Milan-Bicocca, Italy"],"affiliations":[{"raw_affiliation_string":"University of Milan-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049259722","display_name":"Mirko Cesarini","orcid":"https://orcid.org/0000-0001-9601-0403"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Mirko Cesarini","raw_affiliation_strings":["University of Milan-Bicocca, Italy"],"affiliations":[{"raw_affiliation_string":"University of Milan-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047827615","display_name":"Fabio Mercorio","orcid":"https://orcid.org/0000-0001-6864-2702"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabio Mercorio","raw_affiliation_strings":["University of Milano-Bicocca, Italy"],"affiliations":[{"raw_affiliation_string":"University of Milano-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5039716498"],"corresponding_institution_ids":["https://openalex.org/I66752286"],"apc_list":null,"apc_paid":null,"fwci":0.4204,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.70173943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"189","last_page":"201"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9462000131607056,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.9179248809814453},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8240835666656494},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6619043350219727},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6336911916732788},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5953914523124695},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5945666432380676},{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.5542395710945129},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5165364146232605},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.49120256304740906},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.4860461354255676},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.47926661372184753},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45538026094436646},{"id":"https://openalex.org/keywords/subject-matter-expert","display_name":"Subject-matter expert","score":0.42975562810897827},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.4143028259277344},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4110986590385437},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3466540575027466},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32791250944137573},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.13556396961212158},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10086050629615784}],"concepts":[{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.9179248809814453},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8240835666656494},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6619043350219727},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6336911916732788},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5953914523124695},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5945666432380676},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.5542395710945129},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5165364146232605},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49120256304740906},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.4860461354255676},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.47926661372184753},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45538026094436646},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.42975562810897827},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.4143028259277344},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4110986590385437},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3466540575027466},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32791250944137573},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.13556396961212158},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10086050629615784},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.5220/0005004901890201","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0005004901890201","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 3rd International Conference on Data Management Technologies and Applications","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.853.7388","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.853.7388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://boa.unimib.it/retrieve/handle/10281/52825/79452/DATA2014.pdf","raw_type":"text"},{"id":"pmh:oai:boa.unimib.it:10281/52825","is_oa":true,"landing_page_url":"http://hdl.handle.net/10281/52825","pdf_url":null,"source":{"id":"https://openalex.org/S4306401259","display_name":"BOA (University of Milano-Bicocca)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66752286","host_organization_name":"University of Milano-Bicocca","host_organization_lineage":["https://openalex.org/I66752286"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.5220/0005004901890201","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0005004901890201","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 3rd International Conference on Data Management Technologies and Applications","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W94547874","https://openalex.org/W105202040","https://openalex.org/W109885913","https://openalex.org/W622135080","https://openalex.org/W1486210143","https://openalex.org/W1539265392","https://openalex.org/W1544828112","https://openalex.org/W1567491469","https://openalex.org/W1571692007","https://openalex.org/W1604958321","https://openalex.org/W1610496399","https://openalex.org/W1611041715","https://openalex.org/W1660264423","https://openalex.org/W1668408280","https://openalex.org/W1680392829","https://openalex.org/W1700279323","https://openalex.org/W1813492422","https://openalex.org/W1980224569","https://openalex.org/W1997952460","https://openalex.org/W2004291985","https://openalex.org/W2026324192","https://openalex.org/W2033626294","https://openalex.org/W2044022210","https://openalex.org/W2046298800","https://openalex.org/W2046769817","https://openalex.org/W2046977065","https://openalex.org/W2047745978","https://openalex.org/W2053209119","https://openalex.org/W2060939697","https://openalex.org/W2065753378","https://openalex.org/W2089206172","https://openalex.org/W2094344402","https://openalex.org/W2108991785","https://openalex.org/W2119111481","https://openalex.org/W2137775416","https://openalex.org/W2140436802","https://openalex.org/W2150983842","https://openalex.org/W2161163216","https://openalex.org/W2163600218","https://openalex.org/W2183508320","https://openalex.org/W2213504322","https://openalex.org/W2240123196","https://openalex.org/W2261688869","https://openalex.org/W2294655953","https://openalex.org/W2400827012","https://openalex.org/W2400958823","https://openalex.org/W2576958202","https://openalex.org/W2970113856","https://openalex.org/W3000214033","https://openalex.org/W3146259567","https://openalex.org/W4244517071"],"related_works":["https://openalex.org/W962911587","https://openalex.org/W4200551113","https://openalex.org/W2270762093","https://openalex.org/W2028861106","https://openalex.org/W4255072332","https://openalex.org/W3126834064","https://openalex.org/W3110671107","https://openalex.org/W2357854711","https://openalex.org/W1754154538","https://openalex.org/W4243448361"],"abstract_inverted_index":{"Abstract:":[0],"Research":[1],"on":[2,102,150],"data":[3,32,50,68],"quality":[4,48],"is":[5,90],"growing":[6],"in":[7,9],"importance":[8],"both":[10],"industrial":[11],"and":[12,107,148],"academic":[13],"communities,":[14],"as":[15],"it":[16],"aims":[17],"at":[18],"deriving":[19],"knowledge":[20,61],"(and":[21],"then":[22],"value)":[23],"from":[24,136],"data.":[25],"Information":[26],"Systems":[27],"generate":[28],"a":[29,51,66,76,88,116,155,172],"lot":[30],"of":[31,38,49,58,105,113,130,158,177],"useful":[33],"for":[34,54,92,185,197],"studying":[35],"the":[36,47,56,59,103,110,122,127,137,159,175,182,189],"dynamics":[37],"subjects":[39],"\u2019":[40],"behaviours":[41],"or":[42],"phenomena":[43],"over":[44],"time,":[45],"making":[46],"crucial":[52],"aspect":[53],"guaranteeing":[55],"believability":[57],"overall":[60],"discovery":[62],"process.":[63],"In":[64],"such":[65],"scenario,":[67],"cleansing":[69,133],"techniques,":[70],"i.e.,":[71],"automatic":[72,111,128],"methods":[73],"to":[74,139],"cleanse":[75],"dirty":[77],"dataset,":[78,154],"are":[79,86],"paramount.":[80],"However,":[81],"when":[82],"multiple":[83],"cleans-ing":[84],"alternatives":[85],"available":[87,196],"policy":[89,97,134,186],"required":[91],"choosing":[93],"between":[94],"them.":[95],"The":[96,142],"design":[98],"task":[99],"still":[100],"relies":[101],"experience":[104],"domain-experts,":[106],"this":[108],"makes":[109],"identification":[112],"accurate":[114,132],"policies":[115],"signifi-cant":[117],"issue.":[118],"This":[119],"paper":[120],"extends":[121],"Universal":[123],"Cleaning":[124],"Process":[125],"enabling":[126],"generation":[129,176],"an":[131,151],"derived":[135],"dataset":[138],"be":[140],"analysed.":[141],"proposed":[143],"approach":[144,169],"has":[145],"been":[146,193],"implemented":[147],"tested":[149],"on-line":[152],"benchmark":[153],"real-world":[156],"instance":[157],"Labour":[160],"Market":[161],"Domain.":[162],"Our":[163],"preliminary":[164],"results":[165,191],"show":[166],"that":[167],"our":[168],"would":[170],"represent":[171],"contribution":[173],"towards":[174],"data-driven":[178],"policy,":[179],"reducing":[180],"significantly":[181],"domain-experts":[183],"intervention":[184],"specification.":[187],"Finally,":[188],"generated":[190],"have":[192],"made":[194],"publicly":[195],"downloading.":[198],"1":[199]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
