{"id":"https://openalex.org/W2133160781","doi":"https://doi.org/10.1080/713827180","title":"Data preparation for data mining","display_name":"Data preparation for data mining","publication_year":2003,"publication_date":"2003-05-01","ids":{"openalex":"https://openalex.org/W2133160781","doi":"https://doi.org/10.1080/713827180","mag":"2133160781"},"language":"en","primary_location":{"id":"doi:10.1080/713827180","is_oa":false,"landing_page_url":"https://doi.org/10.1080/713827180","pdf_url":null,"source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100764178","display_name":"Shichao Zhang","orcid":"https://orcid.org/0000-0001-9981-2970"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Shichao Zhang","raw_affiliation_strings":["\n Faculty of Information Technology, University of Technology, Sydney, Australia","Faculty of Information Technology, University of Technology Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"\n Faculty of Information Technology, University of Technology, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Faculty of Information Technology, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100438525","display_name":"Chengqi Zhang","orcid":"https://orcid.org/0000-0001-5715-7154"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Chengqi Zhang","raw_affiliation_strings":["\n Faculty of Information Technology, University of Technology, Sydney, Australia","Faculty of Information Technology, University of Technology Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"\n Faculty of Information Technology, University of Technology, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Faculty of Information Technology, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100636286","display_name":"Qiang Yang","orcid":"https://orcid.org/0000-0001-5059-8360"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qiang Yang","raw_affiliation_strings":["\n Computer Science Department, Hong Kong University of Science and Technology, Kowloon, Hong Kong, China","Computer Science Department , Hong Kong University of Science and Technology , Kowloon, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"\n Computer Science Department, Hong Kong University of Science and Technology, Kowloon, Hong Kong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Computer Science Department , Hong Kong University of Science and Technology , Kowloon, Hong Kong, China","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100764178"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":{"value":2195,"currency":"USD","value_usd":2195},"apc_paid":null,"fwci":9.5873,"has_fulltext":false,"cited_by_count":507,"citation_normalized_percentile":{"value":0.97583502,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"17","issue":"5-6","first_page":"375","last_page":"381"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8559283018112183},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.7023845314979553},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5972480177879333},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.559739887714386},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5134959816932678},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4545544385910034},{"id":"https://openalex.org/keywords/data-analysis","display_name":"Data analysis","score":0.4447072446346283},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.44350534677505493},{"id":"https://openalex.org/keywords/data-warehouse","display_name":"Data warehouse","score":0.4242938756942749},{"id":"https://openalex.org/keywords/profit","display_name":"Profit (economics)","score":0.416208416223526}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8559283018112183},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.7023845314979553},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5972480177879333},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.559739887714386},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5134959816932678},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4545544385910034},{"id":"https://openalex.org/C175801342","wikidata":"https://www.wikidata.org/wiki/Q1988917","display_name":"Data analysis","level":2,"score":0.4447072446346283},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.44350534677505493},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.4242938756942749},{"id":"https://openalex.org/C181622380","wikidata":"https://www.wikidata.org/wiki/Q26911","display_name":"Profit (economics)","level":2,"score":0.416208416223526},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1080/713827180","is_oa":false,"landing_page_url":"https://doi.org/10.1080/713827180","pdf_url":null,"source":{"id":"https://openalex.org/S125501549","display_name":"Applied Artificial Intelligence","issn_l":"0883-9514","issn":["0883-9514","1087-6545"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Applied Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.106.2852","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.106.2852","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ftp.cse.ust.hk/~qyang/Docs/2003/s1.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.542.3303","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.542.3303","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cs.nju.edu.cn/zhouzh/zhouzh.files/course/dm/reading/reading03/zhang_aai03.pdf","raw_type":"text"},{"id":"pmh:oai:opus.lib.uts.edu.au:10453/4062","is_oa":false,"landing_page_url":"http://hdl.handle.net/10453/4062","pdf_url":null,"source":{"id":"https://openalex.org/S4306401357","display_name":"UTS ePRESS (University of Technology Sydney)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114017466","host_organization_name":"University of Technology Sydney","host_organization_lineage":["https://openalex.org/I114017466"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1983479840","https://openalex.org/W1985882873","https://openalex.org/W2005861721","https://openalex.org/W2043922977","https://openalex.org/W2058420708","https://openalex.org/W2084691665","https://openalex.org/W2095000556","https://openalex.org/W2133006565","https://openalex.org/W2137494120","https://openalex.org/W2140677665","https://openalex.org/W2151719682"],"related_works":["https://openalex.org/W2135633991","https://openalex.org/W962911587","https://openalex.org/W2270762093","https://openalex.org/W3126834064","https://openalex.org/W2901826185","https://openalex.org/W2353586736","https://openalex.org/W2735063176","https://openalex.org/W69955301","https://openalex.org/W2181930696","https://openalex.org/W2593811201"],"abstract_inverted_index":{"Data":[0],"preparation":[1,72],"is":[2,15],"a":[3,10],"fundamental":[4],"stage":[5],"of":[6,12,70,84,93],"data":[7,19,36,54,71,74,85],"analysis.":[8],"While":[9],"lot":[11],"low-quality":[13],"information":[14],"available":[16],"in":[17,31,73,81],"various":[18],"sources":[20],"and":[21,95],"on":[22],"the":[23,35,59,68,82],"Web,":[24],"many":[25],"organizations":[26],"or":[27],"companies":[28],"are":[29],"interested":[30],"how":[32],"to":[33],"transform":[34],"into":[37],"cleaned":[38],"forms":[39],"which":[40],"can":[41],"be":[42],"used":[43],"for":[44,53],"high-profit":[45],"purposes.":[46],"This":[47],"goal":[48],"generates":[49],"an":[50],"urgent":[51],"need":[52],"analysis":[55],"aimed":[56],"at":[57],"cleaning":[58],"raw":[60],"data.":[61],"In":[62],"this":[63],"paper,":[64],"we":[65,88],"first":[66],"show":[67],"importance":[69],"analysis,":[75],"then":[76],"introduce":[77],"some":[78,90],"research":[79,94],"achievements":[80],"area":[83],"preparation.":[86],"Finally,":[87],"suggest":[89],"future":[91],"directions":[92],"development":[96]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":32},{"year":2024,"cited_by_count":27},{"year":2023,"cited_by_count":45},{"year":2022,"cited_by_count":59},{"year":2021,"cited_by_count":57},{"year":2020,"cited_by_count":53},{"year":2019,"cited_by_count":46},{"year":2018,"cited_by_count":29},{"year":2017,"cited_by_count":16},{"year":2016,"cited_by_count":26},{"year":2015,"cited_by_count":21},{"year":2014,"cited_by_count":16},{"year":2013,"cited_by_count":10},{"year":2012,"cited_by_count":20}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
