{"id":"https://openalex.org/W4327781346","doi":"https://doi.org/10.1109/sds57574.2022.10062900","title":"A Simple Approach for Data Cleansing on Hadoop Framework using File Merging Technique","display_name":"A Simple Approach for Data Cleansing on Hadoop Framework using File Merging Technique","publication_year":2022,"publication_date":"2022-12-12","ids":{"openalex":"https://openalex.org/W4327781346","doi":"https://doi.org/10.1109/sds57574.2022.10062900"},"language":"en","primary_location":{"id":"doi:10.1109/sds57574.2022.10062900","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sds57574.2022.10062900","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 Ninth International Conference on Software Defined Systems (SDS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069326900","display_name":"Adnan Ali","orcid":"https://orcid.org/0000-0001-8776-0982"},"institutions":[{"id":"https://openalex.org/I161913731","display_name":"Al Ain University","ror":"https://ror.org/023abrt21","country_code":"AE","type":"education","lineage":["https://openalex.org/I161913731"]}],"countries":["AE"],"is_corresponding":true,"raw_author_name":"Adnan Ali","raw_affiliation_strings":["Information and Technology Center, Al-Ain University,Al Ain,UAE","Information and Technology Center, Al-Ain University, Al Ain, UAE"],"affiliations":[{"raw_affiliation_string":"Information and Technology Center, Al-Ain University,Al Ain,UAE","institution_ids":["https://openalex.org/I161913731"]},{"raw_affiliation_string":"Information and Technology Center, Al-Ain University, Al Ain, UAE","institution_ids":["https://openalex.org/I161913731"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044007513","display_name":"Nada Masood Mirza","orcid":"https://orcid.org/0000-0002-4410-7657"},"institutions":[{"id":"https://openalex.org/I201726411","display_name":"United Arab Emirates University","ror":"https://ror.org/01km6p862","country_code":"AE","type":"education","lineage":["https://openalex.org/I201726411"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Nada Masood Mirza","raw_affiliation_strings":["University College, United Arab Emirates University,Al Ain,UAE","University College, United Arab Emirates University, Al Ain, UAE"],"affiliations":[{"raw_affiliation_string":"University College, United Arab Emirates University,Al Ain,UAE","institution_ids":["https://openalex.org/I201726411"]},{"raw_affiliation_string":"University College, United Arab Emirates University, Al Ain, UAE","institution_ids":["https://openalex.org/I201726411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014788255","display_name":"Rawad Bader","orcid":null},"institutions":[{"id":"https://openalex.org/I161913731","display_name":"Al Ain University","ror":"https://ror.org/023abrt21","country_code":"AE","type":"education","lineage":["https://openalex.org/I161913731"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Rawad Bader","raw_affiliation_strings":["Information and Technology Center, Al-Ain University,Al Ain,UAE","Information and Technology Center, Al-Ain University, Al Ain, UAE"],"affiliations":[{"raw_affiliation_string":"Information and Technology Center, Al-Ain University,Al Ain,UAE","institution_ids":["https://openalex.org/I161913731"]},{"raw_affiliation_string":"Information and Technology Center, Al-Ain University, Al Ain, UAE","institution_ids":["https://openalex.org/I161913731"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085584754","display_name":"Mohamad Khairi Ishak","orcid":"https://orcid.org/0000-0002-3554-0061"},"institutions":[{"id":"https://openalex.org/I139322472","display_name":"Universiti Sains Malaysia","ror":"https://ror.org/02rgb2k63","country_code":"MY","type":"education","lineage":["https://openalex.org/I139322472"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Mohamad Khairi Ishak","raw_affiliation_strings":["School of Electrical &#x0026; Electronic Engineering, Universiti Sains Malaysia,Penang,Malaysia"],"affiliations":[{"raw_affiliation_string":"School of Electrical &#x0026; Electronic Engineering, Universiti Sains Malaysia,Penang,Malaysia","institution_ids":["https://openalex.org/I139322472"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5069326900"],"corresponding_institution_ids":["https://openalex.org/I161913731"],"apc_list":null,"apc_paid":null,"fwci":0.3031,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.66805128,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8656458258628845},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6126182079315186},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.6043204665184021},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5201521515846252},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5197410583496094},{"id":"https://openalex.org/keywords/distributed-file-system","display_name":"Distributed File System","score":0.47946178913116455},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.42885416746139526},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.39109861850738525},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.368390828371048},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30423566699028015},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.08646360039710999}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8656458258628845},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6126182079315186},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.6043204665184021},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5201521515846252},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5197410583496094},{"id":"https://openalex.org/C152043487","wikidata":"https://www.wikidata.org/wiki/Q1229600","display_name":"Distributed File System","level":2,"score":0.47946178913116455},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.42885416746139526},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.39109861850738525},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.368390828371048},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30423566699028015},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.08646360039710999},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sds57574.2022.10062900","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sds57574.2022.10062900","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 Ninth International Conference on Software Defined Systems (SDS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1034796090","https://openalex.org/W1579798859","https://openalex.org/W2064766209","https://openalex.org/W2131576956","https://openalex.org/W2151763080","https://openalex.org/W2158957441","https://openalex.org/W2165078986","https://openalex.org/W2342504379","https://openalex.org/W2405680777","https://openalex.org/W2748073839","https://openalex.org/W2761062406","https://openalex.org/W2885073745","https://openalex.org/W2888594961","https://openalex.org/W2888784313","https://openalex.org/W2889740674","https://openalex.org/W2919173667","https://openalex.org/W3112375919","https://openalex.org/W3199381283","https://openalex.org/W4213251304","https://openalex.org/W6703957577"],"related_works":["https://openalex.org/W753420207","https://openalex.org/W1480425691","https://openalex.org/W3042976586","https://openalex.org/W2353896575","https://openalex.org/W1550839482","https://openalex.org/W2379000728","https://openalex.org/W2368354671","https://openalex.org/W2389631998","https://openalex.org/W2362483455","https://openalex.org/W2355113374"],"abstract_inverted_index":{"Hadoop":[0,149],"framework":[1,25,138,150],"is":[2,26,53,141],"known":[3],"for":[4],"being":[5],"top-notch":[6],"in":[7,17,59,136],"processing":[8,41],"these":[9,47],"huge":[10,114],"files":[11,34,49,69,115],"and":[12,28,43,70,80,88],"providing":[13],"useful":[14],"data.":[15,118],"Unfortunately,":[16],"a":[18,76,112],"scenario":[19],"with":[20,116],"many":[21,36],"small":[22,33,48,68],"files,":[23],"the":[24,39,60,99,108,123,129,137,147],"inefficient":[27],"fails":[29],"to":[30],"deliver.":[31],"These":[32],"cause":[35],"issues":[37],"when":[38],"framework's":[40,100,130],"criteria":[42],"performance":[44,101,131],"levels.":[45],"Moreover,":[46],"contain":[50],"content":[51],"that":[52,90,122],"useless":[54],"or":[55],"provides":[56],"no":[57],"benefit":[58],"key-value":[61],"decision-making.":[62],"To":[63],"overcome":[64],"this":[65,73],"issue":[66],"of":[67],"unnecessary":[71],"content,":[72],"paper":[74],"proposes":[75],"simple":[77],"data":[78],"cleansing":[79],"file":[81],"merging":[82],"approach":[83],"based":[84],"on":[85],"specific":[86],"type":[87],"size":[89],"will":[91,97,110],"not":[92,126],"only":[93,127],"be":[94,111],"effective":[95],"but":[96,132],"increase":[98],"by":[102],"approx.":[103],"68%.":[104],"This":[105],"algorithm":[106],"ensures":[107],"output":[109],"few":[113],"essential/important":[117],"The":[119],"results":[120],"show":[121],"proposed":[124],"system":[125],"improves":[128],"also":[133],"reduces":[134],"deadlocks":[135],"processes,":[139],"which":[140],"approximately":[142],"68":[143],"%":[144],"improvement":[145],"over":[146],"base":[148],"processing.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
