{"id":"https://openalex.org/W2100002396","doi":"https://doi.org/10.1145/2641575","title":"A Model-Based Approach for Developing Data Cleansing Solutions","display_name":"A Model-Based Approach for Developing Data Cleansing Solutions","publication_year":2015,"publication_date":"2015-03-02","ids":{"openalex":"https://openalex.org/W2100002396","doi":"https://doi.org/10.1145/2641575","mag":"2100002396"},"language":"en","primary_location":{"id":"doi:10.1145/2641575","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2641575","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039716498","display_name":"Mario Mezzanzanica","orcid":"https://orcid.org/0000-0003-0399-2810"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Mario Mezzanzanica","raw_affiliation_strings":["Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]},{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058377858","display_name":"Roberto Boselli","orcid":"https://orcid.org/0000-0002-4574-3137"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Roberto Boselli","raw_affiliation_strings":["Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]},{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049259722","display_name":"Mirko Cesarini","orcid":"https://orcid.org/0000-0001-9601-0403"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Mirko Cesarini","raw_affiliation_strings":["Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]},{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047827615","display_name":"Fabio Mercorio","orcid":"https://orcid.org/0000-0001-6864-2702"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabio Mercorio","raw_affiliation_strings":["Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy","institution_ids":["https://openalex.org/I66752286"]},{"raw_affiliation_string":"Department of Statistics and Quantitative Methods, C.R.I.S.P. Research Centre, University of Milano-Bicocca, Italy#TAB#","institution_ids":["https://openalex.org/I66752286"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5039716498"],"corresponding_institution_ids":["https://openalex.org/I66752286"],"apc_list":null,"apc_paid":null,"fwci":4.0059,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.93538656,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"5","issue":"4","first_page":"1","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9807999730110168,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9739999771118164,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.9719799757003784},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8536992073059082},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5690419673919678},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.529042661190033},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5180357098579407},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4915648400783539},{"id":"https://openalex.org/keywords/asset","display_name":"Asset (computer security)","score":0.4790286123752594},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4663262665271759},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.44716203212738037},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.44094857573509216},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.43729168176651},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.39910265803337097},{"id":"https://openalex.org/keywords/risk-analysis","display_name":"Risk analysis (engineering)","score":0.33644813299179077},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15797138214111328},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14094817638397217}],"concepts":[{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.9719799757003784},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8536992073059082},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5690419673919678},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.529042661190033},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5180357098579407},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4915648400783539},{"id":"https://openalex.org/C76178495","wikidata":"https://www.wikidata.org/wiki/Q4808784","display_name":"Asset (computer security)","level":2,"score":0.4790286123752594},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4663262665271759},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.44716203212738037},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.44094857573509216},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.43729168176651},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.39910265803337097},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.33644813299179077},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15797138214111328},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14094817638397217},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2641575","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2641575","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},{"id":"pmh:oai:boa.unimib.it:10281/79861","is_oa":false,"landing_page_url":"http://hdl.handle.net/10281/79861","pdf_url":null,"source":{"id":"https://openalex.org/S4306401259","display_name":"BOA (University of Milano-Bicocca)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66752286","host_organization_name":"University of Milano-Bicocca","host_organization_lineage":["https://openalex.org/I66752286"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","score":0.5600000023841858,"id":"https://metadata.un.org/sdg/1"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W117906119","https://openalex.org/W125598877","https://openalex.org/W997400855","https://openalex.org/W1499805410","https://openalex.org/W1529568204","https://openalex.org/W1529693658","https://openalex.org/W1539265392","https://openalex.org/W1544828112","https://openalex.org/W1549664537","https://openalex.org/W1555519929","https://openalex.org/W1571692007","https://openalex.org/W1583869287","https://openalex.org/W1587682423","https://openalex.org/W1604958321","https://openalex.org/W1610496399","https://openalex.org/W1611041715","https://openalex.org/W1660264423","https://openalex.org/W1700279323","https://openalex.org/W1966188439","https://openalex.org/W1972065960","https://openalex.org/W1985096808","https://openalex.org/W2004291985","https://openalex.org/W2004929506","https://openalex.org/W2025315843","https://openalex.org/W2026319603","https://openalex.org/W2026324192","https://openalex.org/W2030984626","https://openalex.org/W2033657926","https://openalex.org/W2036616350","https://openalex.org/W2044469685","https://openalex.org/W2046298800","https://openalex.org/W2046769817","https://openalex.org/W2047745978","https://openalex.org/W2053209119","https://openalex.org/W2059009730","https://openalex.org/W2060095702","https://openalex.org/W2060646170","https://openalex.org/W2060939697","https://openalex.org/W2068376489","https://openalex.org/W2079040219","https://openalex.org/W2081186682","https://openalex.org/W2089206172","https://openalex.org/W2094344402","https://openalex.org/W2108991785","https://openalex.org/W2113355641","https://openalex.org/W2150983842","https://openalex.org/W2151892901","https://openalex.org/W2153531471","https://openalex.org/W2161163216","https://openalex.org/W2166873269","https://openalex.org/W2170712852","https://openalex.org/W2171999426","https://openalex.org/W2213504322","https://openalex.org/W2240123196","https://openalex.org/W2289289418","https://openalex.org/W2293763696","https://openalex.org/W2400827012","https://openalex.org/W2561675875","https://openalex.org/W2611414181","https://openalex.org/W2768634154","https://openalex.org/W2797249308","https://openalex.org/W2913459036","https://openalex.org/W6636177537"],"related_works":["https://openalex.org/W962911587","https://openalex.org/W2270762093","https://openalex.org/W3126834064","https://openalex.org/W4200551113","https://openalex.org/W4255072332","https://openalex.org/W2028861106","https://openalex.org/W1754154538","https://openalex.org/W3169246587","https://openalex.org/W4253714063","https://openalex.org/W2984010599"],"abstract_inverted_index":{"The":[0,82,104,121],"data":[1,16,23,29,143,150],"extracted":[2],"from":[3,38],"electronic":[4],"archives":[5],"is":[6,31],"a":[7,55,63,92,128,149],"valuable":[8],"asset;":[9],"however,":[10],"the":[11,14,42,76,86,100,111,116,131,167],"issue":[12],"of":[13,44,78,118,134,160],"(poor)":[15],"quality":[17,30],"should":[18,144],"be":[19,96],"addressed":[20],"before":[21],"performing":[22],"analysis":[24,94,106,171],"and":[25,46,71,163,169],"decision-making":[26],"activities.":[27,81,172],"Poor":[28],"frequently":[32],"cleansed":[33],"using":[34,99],"business":[35,52],"rules":[36,53],"derived":[37],"domain":[39,155],"knowledge.":[40],"Unfortunately,":[41],"process":[43,77],"designing":[45],"implementing":[47],"cleansing":[48,80,87,112,168],"activities":[49,88,113],"based":[50],"on":[51,109,127],"requires":[54],"relevant":[56],"effort.":[57],"In":[58],"this":[59],"article,":[60],"we":[61],"illustrate":[62],"model-based":[64,102],"approach":[65,122],"useful":[66],"to":[67,90,165],"perform":[68,91,166],"inconsistency":[69],"identification":[70],"corrective":[72],"interventions,":[73],"thus":[74],"simplifying":[75],"developing":[79],"article":[83],"shows":[84],"how":[85,110,142],"required":[89],"sensitivity":[93,105,170],"can":[95,114],"easily":[97],"developed":[98],"proposed":[101],"approach.":[103],"provides":[107],"insights":[108],"affect":[115],"results":[117],"indicators":[119],"computation.":[120],"has":[123],"been":[124],"successfully":[125],"used":[126,164],"database":[129],"describing":[130],"working":[132],"histories":[133],"an":[135],"Italian":[136],"area":[137],"population.":[138],"A":[139],"model":[140],"formalizing":[141],"evolve":[145],"over":[146],"time":[147],"(i.e.,":[148],"consistency":[151],"model)":[152],"in":[153],"such":[154],"was":[156],"created":[157],"(by":[158],"means":[159],"formal":[161],"methods)":[162]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
