{"id":"https://openalex.org/W2944033008","doi":"https://doi.org/10.1109/is.2018.8710580","title":"Puzzle Methods for Automatic Selection of Data Cleansing Techniques","display_name":"Puzzle Methods for Automatic Selection of Data Cleansing Techniques","publication_year":2018,"publication_date":"2018-09-01","ids":{"openalex":"https://openalex.org/W2944033008","doi":"https://doi.org/10.1109/is.2018.8710580","mag":"2944033008"},"language":"en","primary_location":{"id":"doi:10.1109/is.2018.8710580","is_oa":false,"landing_page_url":"https://doi.org/10.1109/is.2018.8710580","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Conference on Intelligent Systems (IS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072972208","display_name":"Pepa Petrova","orcid":"https://orcid.org/0000-0002-0560-0094"},"institutions":[{"id":"https://openalex.org/I190032430","display_name":"University of Library Studies and Information Technologies","ror":"https://ror.org/001nkz398","country_code":"BG","type":"education","lineage":["https://openalex.org/I190032430"]}],"countries":["BG"],"is_corresponding":true,"raw_author_name":"Pepa Petrova","raw_affiliation_strings":["University of Library Studies and IT ULSIT, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"University of Library Studies and IT ULSIT, Sofia, Bulgaria","institution_ids":["https://openalex.org/I190032430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074143529","display_name":"Vladimir Jotsov","orcid":"https://orcid.org/0000-0002-2860-7918"},"institutions":[{"id":"https://openalex.org/I190032430","display_name":"University of Library Studies and Information Technologies","ror":"https://ror.org/001nkz398","country_code":"BG","type":"education","lineage":["https://openalex.org/I190032430"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Vladimir Jotsov","raw_affiliation_strings":["University of Library Studies and IT ULSIT, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"University of Library Studies and IT ULSIT, Sofia, Bulgaria","institution_ids":["https://openalex.org/I190032430"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110947490","display_name":"Vassil Sgurev","orcid":null},"institutions":[{"id":"https://openalex.org/I24768866","display_name":"Bulgarian Academy of Sciences","ror":"https://ror.org/01x8hew03","country_code":"BG","type":"funder","lineage":["https://openalex.org/I24768866"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Vassil Sgurev","raw_affiliation_strings":["Bulgarian Academy of Science, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"Bulgarian Academy of Science, Sofia, Bulgaria","institution_ids":["https://openalex.org/I24768866"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072972208"],"corresponding_institution_ids":["https://openalex.org/I190032430"],"apc_list":null,"apc_paid":null,"fwci":0.3234,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.69741589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"820","last_page":"826"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.9317836165428162},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7865638732910156},{"id":"https://openalex.org/keywords/data-transformation","display_name":"Data transformation","score":0.6689836978912354},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6549843549728394},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.6087324023246765},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5103475451469421},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.5041674375534058},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4913243353366852},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.4473527669906616},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.32680895924568176},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3250119388103485},{"id":"https://openalex.org/keywords/data-warehouse","display_name":"Data warehouse","score":0.2871728241443634},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.16912317276000977},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.145818829536438},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09527352452278137}],"concepts":[{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.9317836165428162},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7865638732910156},{"id":"https://openalex.org/C150670458","wikidata":"https://www.wikidata.org/wiki/Q4272815","display_name":"Data transformation","level":3,"score":0.6689836978912354},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6549843549728394},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.6087324023246765},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5103475451469421},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.5041674375534058},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4913243353366852},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.4473527669906616},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.32680895924568176},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3250119388103485},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.2871728241443634},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.16912317276000977},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.145818829536438},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09527352452278137},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/is.2018.8710580","is_oa":false,"landing_page_url":"https://doi.org/10.1109/is.2018.8710580","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Conference on Intelligent Systems (IS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2003542523","https://openalex.org/W2464862435","https://openalex.org/W2542385334","https://openalex.org/W2553629210","https://openalex.org/W6728731336"],"related_works":["https://openalex.org/W3110550032","https://openalex.org/W4385163218","https://openalex.org/W2379893251","https://openalex.org/W2121649921","https://openalex.org/W4254254849","https://openalex.org/W4248194256","https://openalex.org/W2195391366","https://openalex.org/W121864761","https://openalex.org/W2111829026","https://openalex.org/W2056085726"],"abstract_inverted_index":{"Business":[0],"Intelligence":[1],"(BI)":[2],"projects":[3],"allow":[4],"the":[5,26,58,80,88,97,101,121,131,139,171,180,185,189,193,200,203,221,228,246,249,254,262,289,302,309,313,316,324,329],"business":[6],"people":[7],"to":[8,54,79,99,157,191,225,244,268,275,291],"see":[9],"their":[10],"information":[11],"requirements":[12],"in":[13,90,170,242,308,315,328],"a":[14,63,116,159,206],"different":[15],"light":[16],"by":[17,66],"removing":[18,109],"dead":[19],"and/or":[20],"useless":[21],"data.":[22],"When":[23,142],"properly":[24],"implemented,":[25],"data":[27,38,59,78,107,127,129,149,194,212,290],"transformation":[28,60,122,165,168,230,250,325],"activities":[29],"of":[30,62,92,96,162,187,202,220,248,253,282,286,288,311],"cleansing,":[31],"summarization,":[32],"derivation,":[33],"aggregation,":[34],"and":[35,45,76,164,176,209,231,252,284,294,320],"integration":[36],"produce":[37],"that":[39,261],"is":[40,52,115,154,178,197,224,265],"clean,":[41],"condensed,":[42],"new,":[43],"complete,":[44],"standardized.":[46],"In":[47,199],"this":[48],"paper,":[49],"an":[50,85],"approach":[51,223,314],"presented":[53],"adding":[55],"logic":[56],"into":[57],"process":[61,173,322],"BI":[64,81,330],"project":[65],"using":[67],"Puzzle":[68,207],"method":[69],"standards.":[70],"The":[71,167,218,258,298],"ready":[72],"tools":[73],"for":[74,87,120,125,135,195,234,301],"cleansing":[75,104,163,213,232],"transforming":[77],"Target":[82],"database":[83],"provide":[84],"opportunity":[86,119],"user,":[89],"view":[91],"his":[93],"prior":[94],"knowledge":[95,287],"data,":[98,146,236],"choose":[100],"most":[102,181],"appropriate":[103,156,229],"techniques":[105,214,274],"(missing":[106],"fields,":[108],"redundancy,":[110],"spelling":[111],"errors,":[112,278],"etc.).":[113],"This":[114],"very":[117],"good":[118],"stage,":[123],"especially":[124],"known":[126,211],"(operational":[128],"from":[130,150,239],"company's":[132],"ERP":[133],"system,":[134],"example).":[136],"It":[137],"saves":[138],"processing":[140,143,263],"time.":[141],"not":[144],"well-known":[145],"such":[147,279],"as":[148,280],"social":[151,240],"networks,":[152,241],"it":[153],"more":[155],"apply":[158],"complete":[160],"set":[161],"techniques.":[166],"step":[169,183],"ETL":[172,256,303],"(Extract,":[174,318],"Transform":[175],"Load)":[177],"generally":[179],"time-consuming":[182],"where":[184,323],"possibility":[186],"shrinking":[188],"time":[190,264],"prepare":[192],"analysis":[196],"essential.":[198],"course":[201],"considered":[204],"study,":[205],"Method":[208],"well-":[210],"have":[215],"been":[216],"applied.":[217],"aim":[219],"proposed":[222],"help":[226],"select":[227],"steps":[233],"unknown":[235],"e.g.":[237],"coming":[238],"order":[243],"increase":[245],"speed":[247],"stage":[251],"whole":[255],"process.":[257,304],"results":[259,299],"show":[260],"reduced":[266],"due":[267],"faster":[269],"decision":[270],"making":[271],"on":[272],"which":[273],"apply.":[276],"Human":[277],"lack":[281,285],"experience":[283],"be":[292],"cleared":[293],"transformed,":[295],"are":[296,300],"diminishing.":[297],"Future":[305],"work":[306],"continues":[307],"direction":[310],"applying":[312],"ELT":[317],"Load":[319],"Transform)":[321],"takes":[326],"place":[327],"target":[331],"Database.":[332]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
