{"id":"https://openalex.org/W2960828776","doi":"https://doi.org/10.1145/3335484.3335495","title":"Intermediate Data Placement Strategy for Different Data Skew Levels Based on Random Sampling in Spark","display_name":"Intermediate Data Placement Strategy for Different Data Skew Levels Based on Random Sampling in Spark","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2960828776","doi":"https://doi.org/10.1145/3335484.3335495","mag":"2960828776"},"language":"en","primary_location":{"id":"doi:10.1145/3335484.3335495","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3335484.3335495","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 4th International Conference on Big Data and Computing  - ICBDC 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104110959","display_name":"Xueqian Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xueqian Gong","raw_affiliation_strings":["School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100388485","display_name":"Chunlin Li","orcid":"https://orcid.org/0000-0001-8338-6065"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunlin Li","raw_affiliation_strings":["School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102141487","display_name":"Youlong Luo","orcid":"https://orcid.org/0009-0002-2313-0695"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youlong Luo","raw_affiliation_strings":["School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5104110959"],"corresponding_institution_ids":["https://openalex.org/I196699116"],"apc_list":null,"apc_paid":null,"fwci":0.28,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.64896639,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"17","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.8541748523712158},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.796089768409729},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.780099093914032},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6126595735549927},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5043743848800659},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4547847509384155},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4129672646522522},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4123101532459259},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.34407660365104675}],"concepts":[{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.8541748523712158},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.796089768409729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.780099093914032},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6126595735549927},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5043743848800659},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4547847509384155},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4129672646522522},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4123101532459259},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34407660365104675},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3335484.3335495","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3335484.3335495","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 4th International Conference on Big Data and Computing  - ICBDC 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2040842686","https://openalex.org/W2059348194","https://openalex.org/W2062187426","https://openalex.org/W2073459573","https://openalex.org/W2080131844","https://openalex.org/W2100830825","https://openalex.org/W2120755171","https://openalex.org/W2122370965","https://openalex.org/W2141267666","https://openalex.org/W2146591355","https://openalex.org/W2165769837","https://openalex.org/W2295769400","https://openalex.org/W2464834590","https://openalex.org/W2734430573","https://openalex.org/W2800195291","https://openalex.org/W2889219672","https://openalex.org/W4239429564"],"related_works":["https://openalex.org/W4290802965","https://openalex.org/W4390608645","https://openalex.org/W97789383","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991"],"abstract_inverted_index":{"In":[0,40,137],"recent":[1],"years,":[2],"the":[3,16,23,30,37,52,64,80,84,98,117,135,143,150,154,158,162,170,178,190,193],"Apache":[4,31],"Spark":[5,32],"had":[6],"been":[7],"widely":[8],"used":[9],"in":[10,43,200,203],"processing":[11],"large-scale":[12],"data.":[13,39,86,165],"However,":[14],"when":[15,63],"input":[17,65,101,120,164],"data":[18,26,61,66,75,102,109,121,128,196],"onto":[19,67],"MapReduce":[20,68],"was":[21,57,69,103,122],"skewed,":[22],"default":[24],"intermediate":[25,85,195],"placement":[27,110,129,197],"algorithm":[28,92,111,130],"of":[29,83,100,119,153,161,174,182,192],"could":[33],"not":[34,133],"efficiently":[35,62],"handle":[36],"skewed":[38,163],"this":[41,201],"paper,":[42],"order":[44],"to":[45,59,78],"achieve":[46],"load":[47,179],"balancing":[48,180],"with":[49],"all":[50],"reducers,":[51],"focus":[53],"on":[54,94,113],"our":[55,138],"attentions":[56],"how":[58],"process":[60],"skewed.":[70],"So":[71],"we":[72,105,124,140,148,168],"defined":[73],"a":[74,90,107,126],"skew":[76,81,99,118],"model":[77],"measure":[79],"degree":[82,181],"We":[87],"also":[88],"applied":[89],"sampling":[91,145],"based":[93,112],"reservoir":[95],"model.":[96],"When":[97,116],"severe,":[104],"proposed":[106,125,199],"fine-grained":[108],"splitting":[114],"cluster.":[115],"slight,":[123],"coarse-grained":[127],"that":[131,156],"did":[132],"split":[134],"clusters.":[136],"experiment,":[139],"first":[141],"chose":[142],"appropriate":[144],"rate.":[146],"Then,":[147],"determined":[149],"optimal":[151],"value":[152],"parameter":[155],"measures":[157],"two":[159,194],"degrees":[160],"At":[166],"last,":[167],"compared":[169],"average":[171],"execution":[172],"time":[173],"several":[175],"algorithms":[176,198],"and":[177],"reducers":[183],"under":[184],"different":[185],"conditions.":[186],"The":[187],"experiments":[188],"confirmed":[189],"efficiency":[191],"paper":[202],"their":[204],"respective":[205],"usage":[206],"scenarios.":[207]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
