{"id":"https://openalex.org/W3008237850","doi":"https://doi.org/10.1145/3366030.3366103","title":"An Application of Distributed Data Mining to Identify Data Quality Problems","display_name":"An Application of Distributed Data Mining to Identify Data Quality Problems","publication_year":2019,"publication_date":"2019-12-02","ids":{"openalex":"https://openalex.org/W3008237850","doi":"https://doi.org/10.1145/3366030.3366103","mag":"3008237850"},"language":"en","primary_location":{"id":"doi:10.1145/3366030.3366103","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3366030.3366103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Information Integration and Web-based Applications &amp; Services","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061070736","display_name":"Eshref Januzaj","orcid":null},"institutions":[{"id":"https://openalex.org/I174004417","display_name":"Munich University of Applied Sciences","ror":"https://ror.org/012k1v959","country_code":"DE","type":"education","lineage":["https://openalex.org/I174004417"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Eshref Januzaj","raw_affiliation_strings":["Munich University of Applied Sciences, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Munich University of Applied Sciences, Munich, Germany","institution_ids":["https://openalex.org/I174004417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057674491","display_name":"Visar Januzaj","orcid":"https://orcid.org/0000-0002-5449-3248"},"institutions":[{"id":"https://openalex.org/I4829542","display_name":"RheinMain University of Applied Sciences","ror":"https://ror.org/0378gm372","country_code":"DE","type":"education","lineage":["https://openalex.org/I4829542"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Visar Januzaj","raw_affiliation_strings":["RheinMain University of Applied Sciences, R\u00fcsselsheim, Germany"],"affiliations":[{"raw_affiliation_string":"RheinMain University of Applied Sciences, R\u00fcsselsheim, Germany","institution_ids":["https://openalex.org/I4829542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042685867","display_name":"Peter Mandl","orcid":"https://orcid.org/0000-0003-4508-7667"},"institutions":[{"id":"https://openalex.org/I174004417","display_name":"Munich University of Applied Sciences","ror":"https://ror.org/012k1v959","country_code":"DE","type":"education","lineage":["https://openalex.org/I174004417"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Peter Mandl","raw_affiliation_strings":["Munich University of Applied Sciences, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Munich University of Applied Sciences, Munich, Germany","institution_ids":["https://openalex.org/I174004417"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061070736"],"corresponding_institution_ids":["https://openalex.org/I174004417"],"apc_list":null,"apc_paid":null,"fwci":0.1981,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62031711,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"418","last_page":"422"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-warehouse","display_name":"Data warehouse","score":0.792519748210907},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7774219512939453},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.7646781802177429},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6644290685653687},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.6448174715042114},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.5491833090782166},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.4987919330596924},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4929332733154297},{"id":"https://openalex.org/keywords/distributed-database","display_name":"Distributed database","score":0.49232757091522217},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43287891149520874},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2758989930152893},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16960874199867249},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10830584168434143}],"concepts":[{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.792519748210907},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7774219512939453},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.7646781802177429},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6644290685653687},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.6448174715042114},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.5491833090782166},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.4987919330596924},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4929332733154297},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.49232757091522217},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43287891149520874},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2758989930152893},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16960874199867249},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10830584168434143},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3366030.3366103","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3366030.3366103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Information Integration and Web-based Applications &amp; Services","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W30294385","https://openalex.org/W119778216","https://openalex.org/W1539265392","https://openalex.org/W1592068762","https://openalex.org/W1596290048","https://openalex.org/W1610496399","https://openalex.org/W2008896880","https://openalex.org/W2059009730","https://openalex.org/W2061401868","https://openalex.org/W2125943921","https://openalex.org/W2128951504","https://openalex.org/W2140190241","https://openalex.org/W2394938466","https://openalex.org/W2543002526","https://openalex.org/W3096155647","https://openalex.org/W6636177537"],"related_works":["https://openalex.org/W2384379346","https://openalex.org/W2901826185","https://openalex.org/W3119296476","https://openalex.org/W2742521120","https://openalex.org/W2159392828","https://openalex.org/W2353586736","https://openalex.org/W2189235061","https://openalex.org/W2353590449","https://openalex.org/W188368245","https://openalex.org/W2181930696"],"abstract_inverted_index":{"When":[0],"dealing":[1],"with":[2,23,29,44],"huge":[3],"data":[4,12,16,36,52,67,72,77,109,124,128],"sets,":[5],"during":[6],"the":[7,30,48,76,92,96,103,112,122],"integration":[8,49],"process":[9,100],"of":[10,34,50,75],"distributed":[11,51,66,80,104,113],"into":[13],"a":[14,71,88],"single":[15],"warehouse,":[17],"one":[18],"is":[19,54],"not":[20],"only":[21],"confronted":[22],"time":[24],"and":[25],"security":[26],"factors":[27],"but":[28],"well":[31],"known":[32],"problem":[33],"low":[35],"quality":[37,73,83],"as":[38],"well.":[39],"In":[40],"order":[41],"to":[42,69,118],"cope":[43],"such":[45],"issues":[46],"that":[47,64,111],"often":[53],"faced":[55],"with,":[56],"we":[57],"present":[58],"in":[59,78],"this":[60],"paper":[61],"an":[62],"approach":[63],"applies":[65],"mining,":[68],"facilitate":[70],"analysis":[74,114],"their":[79],"state.":[81],"Data":[82],"problems":[84],"are":[85,116],"identified":[86],"by":[87],"classifier,":[89],"which":[90],"uses":[91],"knowledge":[93],"gained":[94],"from":[95],"clustering":[97],"(subspace":[98],"clustering)":[99],"performed":[101],"on":[102,107,121],"data.":[105],"Experiments":[106],"real":[108],"show":[110],"results":[115],"comparable":[117],"those":[119],"conducted":[120],"central":[123],"warehouse":[125],"using":[126],"classical":[127],"mining.":[129]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
