{"id":"https://openalex.org/W2790776916","doi":"https://doi.org/10.1145/3171592.3171610","title":"A Comparative Study of Data Skew in Hadoop","display_name":"A Comparative Study of Data Skew in Hadoop","publication_year":2017,"publication_date":"2017-12-08","ids":{"openalex":"https://openalex.org/W2790776916","doi":"https://doi.org/10.1145/3171592.3171610","mag":"2790776916"},"language":"en","primary_location":{"id":"doi:10.1145/3171592.3171610","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3171592.3171610","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 VI International Conference on Network, Communication and Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049434039","display_name":"Majun He","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Majun He","raw_affiliation_strings":["School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101542700","display_name":"Guozhong Li","orcid":"https://orcid.org/0000-0002-1199-9222"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guozhong Li","raw_affiliation_strings":["School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037455403","display_name":"Chaojie Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaojie Huang","raw_affiliation_strings":["School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028764753","display_name":"Yufei Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yufei Ye","raw_affiliation_strings":["School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001104882","display_name":"Wenhong Tian","orcid":"https://orcid.org/0000-0002-5551-9796"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhong Tian","raw_affiliation_strings":["School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5049434039"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":1.4509,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.88070551,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.7469103336334229},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6613184213638306}],"concepts":[{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.7469103336334229},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6613184213638306},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3171592.3171610","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3171592.3171610","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 VI International Conference on Network, Communication and Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2003138918","https://openalex.org/W2017540033","https://openalex.org/W2064299501","https://openalex.org/W2067051372","https://openalex.org/W2080131844","https://openalex.org/W2090673017","https://openalex.org/W2091842814","https://openalex.org/W2140509629","https://openalex.org/W2141267666","https://openalex.org/W2156441202","https://openalex.org/W2173213060","https://openalex.org/W2396433687","https://openalex.org/W4213220468","https://openalex.org/W4255522523"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4290802965","https://openalex.org/W97789383","https://openalex.org/W3087516072","https://openalex.org/W2727156679","https://openalex.org/W4289406402","https://openalex.org/W2364071303","https://openalex.org/W1483053255","https://openalex.org/W2896097814","https://openalex.org/W20221657"],"abstract_inverted_index":{"MapReduce":[0,23],"which":[1],"has":[2],"been":[3,20],"a":[4,119],"well-known":[5],"programming":[6,24],"model":[7,25],"processes":[8],"numerous":[9],"raw":[10],"data":[11,81],"in":[12,84,104],"large":[13],"scale":[14],"clusters.":[15],"However,":[16],"great":[17],"challenges":[18],"have":[19],"brought":[21],"to":[22,73],"while":[26],"routinely":[27],"handling":[28],"the":[29,34,38,42,47,78],"big":[30],"data.":[31],"To":[32],"mitigate":[33],"process":[35],"time":[36],"of":[37,46,80,106],"clusters":[39],"through":[40],"minimizing":[41],"makespan":[43],"is":[44,54],"one":[45],"key":[48],"challenges.":[49],"For":[50],"now,":[51],"(data)":[52],"skew":[53,82],"partly":[55],"responsible":[56],"for":[57],"that":[58],"and":[59,76,99,108,115],"there":[60],"are":[61,102,127],"some":[62],"methods":[63],"presented":[64],"by":[65],"research":[66,125],"teams":[67],"from":[68],"different":[69],"perspectives.":[70],"In":[71],"order":[72],"fully":[74],"understand":[75],"utilize":[77],"state-of-the-art":[79],"problem,":[83],"this":[85],"paper,":[86],"we":[87],"compare":[88],"six":[89],"algorithms:":[90],"Hadoop":[91],"default":[92],"(speculative":[93],"execution),":[94],"SkewReduce,":[95],"SkewTune,":[96],"iShuffle,":[97],"LEEN":[98],"LIBRA.":[100],"They":[101],"compared":[103],"terms":[105],"architecture":[107],"main":[109],"features,":[110],"core":[111],"algorithms,":[112],"performance":[113],"metrics":[114],"evaluation":[116],"methods.":[117],"Finally,":[118],"few":[120],"challenging":[121],"problems":[122],"as":[123],"future":[124],"trends":[126],"summarized.":[128]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
