{"id":"https://openalex.org/W3111439114","doi":"https://doi.org/10.1145/3444831.3444835","title":"Data Preparation","display_name":"Data Preparation","publication_year":2020,"publication_date":"2020-12-17","ids":{"openalex":"https://openalex.org/W3111439114","doi":"https://doi.org/10.1145/3444831.3444835","mag":"3111439114"},"language":"en","primary_location":{"id":"doi:10.1145/3444831.3444835","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3444831.3444835","pdf_url":null,"source":{"id":"https://openalex.org/S47508943","display_name":"ACM SIGMOD Record","issn_l":"0163-5808","issn":["0163-5808","1943-5835"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMOD Record","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032761397","display_name":"Mazhar Hameed","orcid":"https://orcid.org/0000-0002-4821-4417"},"institutions":[{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Mazhar Hameed","raw_affiliation_strings":["University of Potsdam, Potsdam, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Potsdam, Potsdam, Germany","institution_ids":["https://openalex.org/I176453806"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053028480","display_name":"Felix Naumann","orcid":"https://orcid.org/0000-0002-4483-1389"},"institutions":[{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Felix Naumann","raw_affiliation_strings":["University of Potsdam, Potsdam, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Potsdam, Potsdam, Germany","institution_ids":["https://openalex.org/I176453806"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5032761397"],"corresponding_institution_ids":["https://openalex.org/I176453806"],"apc_list":null,"apc_paid":null,"fwci":4.6466,"has_fulltext":false,"cited_by_count":47,"citation_normalized_percentile":{"value":0.95140417,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"49","issue":"3","first_page":"18","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8341420888900757},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.7743493318557739},{"id":"https://openalex.org/keywords/data-management","display_name":"Data management","score":0.6086399555206299},{"id":"https://openalex.org/keywords/data-virtualization","display_name":"Data virtualization","score":0.6059638857841492},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5596404671669006},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5303136706352234},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.5163754224777222},{"id":"https://openalex.org/keywords/data-analysis","display_name":"Data analysis","score":0.4832816421985626},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.46924877166748047},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.42989256978034973},{"id":"https://openalex.org/keywords/data-processing","display_name":"Data processing","score":0.4177160859107971},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3889980912208557},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.3662239909172058},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36435869336128235},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.1306914985179901}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8341420888900757},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.7743493318557739},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.6086399555206299},{"id":"https://openalex.org/C80344994","wikidata":"https://www.wikidata.org/wiki/Q5227369","display_name":"Data virtualization","level":4,"score":0.6059638857841492},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5596404671669006},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5303136706352234},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.5163754224777222},{"id":"https://openalex.org/C175801342","wikidata":"https://www.wikidata.org/wiki/Q1988917","display_name":"Data analysis","level":2,"score":0.4832816421985626},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.46924877166748047},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.42989256978034973},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.4177160859107971},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3889980912208557},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3662239909172058},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36435869336128235},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.1306914985179901},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C513985346","wikidata":"https://www.wikidata.org/wiki/Q270471","display_name":"Virtualization","level":3,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3444831.3444835","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3444831.3444835","pdf_url":null,"source":{"id":"https://openalex.org/S47508943","display_name":"ACM SIGMOD Record","issn_l":"0163-5808","issn":["0163-5808","1943-5835"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMOD Record","raw_type":"journal-article"},{"id":"pmh:oai:kobv.de-opus4-uni-potsdam:56984","is_oa":false,"landing_page_url":"https://publishup.uni-potsdam.de/frontdoor/index/index/docId/56984","pdf_url":null,"source":{"id":"https://openalex.org/S4306400594","display_name":"publish.UP (University of Potsdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I176453806","host_organization_name":"University of Potsdam","host_organization_lineage":["https://openalex.org/I176453806"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W246024570","https://openalex.org/W1705258959","https://openalex.org/W1992673035","https://openalex.org/W2046298800","https://openalex.org/W2064766209","https://openalex.org/W2086068051","https://openalex.org/W2106895292","https://openalex.org/W2126848435","https://openalex.org/W2133160781","https://openalex.org/W2139646386","https://openalex.org/W2143677795","https://openalex.org/W2197636328","https://openalex.org/W2232417456","https://openalex.org/W2493970676","https://openalex.org/W2544486974","https://openalex.org/W2546694561","https://openalex.org/W2610248523","https://openalex.org/W2612824201","https://openalex.org/W2790130692","https://openalex.org/W2798520647","https://openalex.org/W2811035651","https://openalex.org/W2893303656","https://openalex.org/W2904577797","https://openalex.org/W2914121031","https://openalex.org/W2963707382","https://openalex.org/W2963746773","https://openalex.org/W2970992672","https://openalex.org/W3173700007"],"related_works":["https://openalex.org/W2980555063","https://openalex.org/W3198250195","https://openalex.org/W2508885301","https://openalex.org/W2884078111","https://openalex.org/W2053247611","https://openalex.org/W4214526941","https://openalex.org/W2906330222","https://openalex.org/W3097304313","https://openalex.org/W65529773","https://openalex.org/W3111439114"],"abstract_inverted_index":{"Raw":[0],"data":[1,22,36,42,52,56,65,68,73,81,99,109,128,135,147,159,185],"are":[2,10,23,84,122,171],"often":[3],"messy:":[4],"they":[5],"follow":[6],"different":[7],"encodings,":[8],"records":[9],"not":[11,16,26,70],"well":[12],"structured,":[13],"values":[14],"do":[15],"adhere":[17],"to":[18,28,63,116,143],"patterns,":[19],"etc.":[20],"Such":[21],"in":[24,179],"general":[25],"fit":[27],"be":[29],"ingested":[30],"by":[31,41,112],"downstream":[32],"applications,":[33],"such":[34],"as":[35],"analytics":[37],"tools,":[38,149],"or":[39],"even":[40,161],"management":[43],"systems.":[44],"The":[45],"act":[46],"of":[47,181],"obtaining":[48],"information":[49],"from":[50],"raw":[51],"relies":[53],"on":[54],"some":[55],"preparation":[57,60,82,136,148,186],"process.":[58],"Data":[59],"is":[61,90],"integral":[62],"advanced":[64],"analysis":[66],"and":[67,86,95,101,120,125,168,183,189],"management,":[69],"only":[71],"for":[72,76,93,107,127,157,162],"science":[74],"but":[75,88],"any":[77],"data-driven":[78],"applications.":[79],"Existing":[80],"tools":[83,126,164],"operational":[85],"useful,":[87],"there":[89],"still":[91,172],"room":[92],"improvement":[94],"optimization.":[96],"With":[97],"increasing":[98],"volume":[100],"its":[102],"messy":[103],"nature,":[104],"the":[105,133,155],"demand":[106],"prepared":[108],"increases":[110],"day":[111],"day.":[113],"To":[114,130],"cater":[115],"this":[117],"demand,":[118],"companies":[119],"researchers":[121],"developing":[123],"techniques":[124],"preparation.":[129],"better":[131],"understand":[132],"available":[134],"systems,":[137],"we":[138],"have":[139],"conducted":[140],"a":[141],"survey":[142],"investigate":[144],"(1)":[145],"prominent":[146],"(2)":[150],"distinctive":[151],"tool":[152],"features,":[153],"(3)":[154],"need":[156],"preliminary":[158],"processing":[160],"these":[163],"and,":[165],"(4)":[166],"features":[167],"abilities":[169],"that":[170],"lacking.":[173],"We":[174],"conclude":[175],"with":[176],"an":[177],"argument":[178],"support":[180],"automatic":[182],"intelligent":[184],"beyond":[187],"traditional":[188],"simplistic":[190],"techniques.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2020-12-21T00:00:00"}
