{"id":"https://openalex.org/W3033117735","doi":"https://doi.org/10.3390/data5020050","title":"Data Wrangling in Database Systems: Purging of Dirty Data","display_name":"Data Wrangling in Database Systems: Purging of Dirty Data","publication_year":2020,"publication_date":"2020-06-05","ids":{"openalex":"https://openalex.org/W3033117735","doi":"https://doi.org/10.3390/data5020050","mag":"3033117735"},"language":"en","primary_location":{"id":"doi:10.3390/data5020050","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data5020050","pdf_url":"https://www.mdpi.com/2306-5729/5/2/50/pdf?version=1591342877","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2306-5729/5/2/50/pdf?version=1591342877","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008817869","display_name":"Otmane Azeroual","orcid":"https://orcid.org/0000-0002-5225-389X"},"institutions":[{"id":"https://openalex.org/I4210111137","display_name":"German Centre for Higher Education Research and Science Studies","ror":"https://ror.org/01n8j6z65","country_code":"DE","type":"government","lineage":["https://openalex.org/I4210111137"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Otmane Azeroual","raw_affiliation_strings":["German Center for Higher Education Research and Science Studies (DZHW), Sch\u00fctzenstra\u00dfe 6a, Berlin 10117, Germany"],"raw_orcid":"https://orcid.org/0000-0002-5225-389X","affiliations":[{"raw_affiliation_string":"German Center for Higher Education Research and Science Studies (DZHW), Sch\u00fctzenstra\u00dfe 6a, Berlin 10117, Germany","institution_ids":["https://openalex.org/I4210111137"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5008817869"],"corresponding_institution_ids":["https://openalex.org/I4210111137"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":4.8302,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.95253772,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"5","issue":"2","first_page":"50","last_page":"50"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9761000275611877,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.7879918813705444},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.7692664861679077},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6494700908660889},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5291308164596558},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5227141976356506},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4882245361804962},{"id":"https://openalex.org/keywords/theme","display_name":"Theme (computing)","score":0.47501978278160095},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43869900703430176},{"id":"https://openalex.org/keywords/data-processing","display_name":"Data processing","score":0.4226973056793213},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.419463574886322},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14591780304908752},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.14279231429100037},{"id":"https://openalex.org/keywords/operations-management","display_name":"Operations management","score":0.06104934215545654}],"concepts":[{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.7879918813705444},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.7692664861679077},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6494700908660889},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5291308164596558},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5227141976356506},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4882245361804962},{"id":"https://openalex.org/C33566652","wikidata":"https://www.wikidata.org/wiki/Q1065927","display_name":"Theme (computing)","level":2,"score":0.47501978278160095},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43869900703430176},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.4226973056793213},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.419463574886322},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14591780304908752},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.14279231429100037},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.06104934215545654},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/data5020050","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data5020050","pdf_url":"https://www.mdpi.com/2306-5729/5/2/50/pdf?version=1591342877","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:gam:jdataj:v:5:y:2020:i:2:p:50-:d:367592","is_oa":false,"landing_page_url":"https://www.mdpi.com/2306-5729/5/2/50/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:1c49663e0d8841d4a31149e97d004c24","is_oa":true,"landing_page_url":"https://doaj.org/article/1c49663e0d8841d4a31149e97d004c24","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 5, Iss 2, p 50 (2020)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2306-5729/5/2/50/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/data5020050","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/data5020050","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data5020050","pdf_url":"https://www.mdpi.com/2306-5729/5/2/50/pdf?version=1591342877","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4399999976158142,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3033117735.pdf","grobid_xml":"https://content.openalex.org/works/W3033117735.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W246024570","https://openalex.org/W1567491469","https://openalex.org/W1966188439","https://openalex.org/W2000237244","https://openalex.org/W2019130209","https://openalex.org/W2064766209","https://openalex.org/W2068376489","https://openalex.org/W2143677795","https://openalex.org/W2484398893","https://openalex.org/W2794656727","https://openalex.org/W2798218576","https://openalex.org/W2914665335","https://openalex.org/W2919119516","https://openalex.org/W2955435148","https://openalex.org/W6750872235"],"related_works":["https://openalex.org/W962911587","https://openalex.org/W4200551113","https://openalex.org/W2270762093","https://openalex.org/W2028861106","https://openalex.org/W4255072332","https://openalex.org/W3126834064","https://openalex.org/W3110671107","https://openalex.org/W2084395052","https://openalex.org/W1754154538","https://openalex.org/W3033117735"],"abstract_inverted_index":{"Researchers":[0],"need":[1],"to":[2,5,29,36,109,116,122,133,183],"be":[3,148,158,178],"able":[4],"integrate":[6],"ever-increasing":[7],"amounts":[8],"of":[9,16,22,34,47,62,94,104,129,137],"data":[10,35,41,49,79,96,105,111,114,120,130,139,170,186,189],"into":[11],"their":[12,43,141,192],"institutional":[13],"databases,":[14],"regardless":[15],"the":[17,23,31,74,95,101,118,169],"source,":[18],"format,":[19],"or":[20,113],"size":[21],"data.":[24],"It":[25],"is":[26,97,132],"then":[27,155],"necessary":[28],"use":[30],"increasing":[32],"diversity":[33],"derive":[37],"greater":[38],"value":[39],"from":[40,187],"for":[42,76,150],"organization.":[44],"The":[45,92,127],"processing":[46],"electronic":[48],"plays":[50],"a":[51,59,87,135],"central":[52],"role":[53],"in":[54,65,140,180],"modern":[55],"society.":[56],"Data":[57],"constitute":[58],"fundamental":[60],"part":[61],"operational":[63],"processes":[64],"companies":[66],"and":[67,85,121,124,174,194],"scientific":[68],"organizations.":[69],"In":[70],"addition,":[71],"they":[72,146],"form":[73],"basis":[75],"decisions.":[77],"Bad":[78],"quality":[80,93],"can":[81,147,156,177],"negatively":[82],"affect":[83],"decisions":[84],"have":[86],"negative":[88],"impact":[89],"on":[90],"results.":[91],"crucial.":[98],"This":[99,165],"includes":[100],"new":[102],"theme":[103],"wrangling,":[106],"sometimes":[107],"referred":[108],"as":[110],"munging":[112],"crunching,":[115],"find":[117],"dirty":[119],"transform":[123],"clean":[125,184],"them.":[126],"aim":[128],"wrangling":[131,171],"prepare":[134],"lot":[136],"raw":[138],"original":[142],"state":[143],"so":[144],"that":[145,160],"used":[149,179],"further":[151],"analysis":[152],"steps.":[153],"Only":[154],"knowledge":[157],"obtained":[159],"may":[161],"bring":[162],"added":[163],"value.":[164],"paper":[166],"shows":[167],"how":[168,175],"process":[172],"works":[173],"it":[176],"database":[181],"systems":[182],"up":[185],"heterogeneous":[188],"sources":[190],"during":[191],"acquisition":[193],"integration.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2025-10-10T00:00:00"}
