{"id":"https://openalex.org/W4404181247","doi":"https://doi.org/10.14778/3685800.3685804","title":"Towards Resource Efficiency: Practical Insights into Large-Scale Spark Workloads at ByteDance","display_name":"Towards Resource Efficiency: Practical Insights into Large-Scale Spark Workloads at ByteDance","publication_year":2024,"publication_date":"2024-08-01","ids":{"openalex":"https://openalex.org/W4404181247","doi":"https://doi.org/10.14778/3685800.3685804"},"language":"en","primary_location":{"id":"doi:10.14778/3685800.3685804","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3685800.3685804","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104314960","display_name":"Yixin Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yixin Wu","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035676193","display_name":"Xiuqi Huang","orcid":"https://orcid.org/0009-0004-4443-1555"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiuqi Huang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029455241","display_name":"Zhilong Wei","orcid":"https://orcid.org/0000-0002-5880-287X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongjia Wei","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089664772","display_name":"Hang Cheng","orcid":"https://orcid.org/0000-0003-2443-2820"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hang Cheng","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064912730","display_name":"Canjie Xin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chaohui Xin","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069104472","display_name":"Zuzhi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuzhi Chen","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687667","display_name":"Binbin Chen","orcid":"https://orcid.org/0000-0001-7533-0686"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Binbin Chen","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035870404","display_name":"Yu\u2010Fei Wu","orcid":"https://orcid.org/0000-0002-3970-3999"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yufei Wu","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116592778","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0003-2227-075X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101487889","display_name":"Tieying Zhang","orcid":"https://orcid.org/0009-0003-2250-5528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tieying Zhang","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048460408","display_name":"Rui Shi","orcid":"https://orcid.org/0009-0001-6471-6759"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rui Shi","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019439900","display_name":"Xiaofeng Gao","orcid":"https://orcid.org/0000-0003-1776-8799"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofeng Gao","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100515276","display_name":"Yuming Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuming Liang","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112302242","display_name":"Pengwei Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pengwei Zhao","raw_affiliation_strings":["ByteDance Inc"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100428808","display_name":"Guihai Chen","orcid":"https://orcid.org/0000-0002-6934-1685"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guihai Chen","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":15,"corresponding_author_ids":["https://openalex.org/A5104314960"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.871,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.95544067,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"17","issue":"12","first_page":"3759","last_page":"3771"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.793599545955658},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5751312971115112},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5569034814834595},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5042613744735718},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.34012550115585327},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08459147810935974}],"concepts":[{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.793599545955658},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5751312971115112},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5569034814834595},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5042613744735718},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34012550115585327},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08459147810935974},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3685800.3685804","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3685800.3685804","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.6800000071525574,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2076164405","https://openalex.org/W2290712622","https://openalex.org/W2465838301","https://openalex.org/W2488300951","https://openalex.org/W2735420144","https://openalex.org/W2760770811","https://openalex.org/W2798457757","https://openalex.org/W2966185412","https://openalex.org/W2982167413","https://openalex.org/W3022630129","https://openalex.org/W3084687783","https://openalex.org/W3097528317","https://openalex.org/W3098844916","https://openalex.org/W3105441357","https://openalex.org/W3113331478","https://openalex.org/W3170664780","https://openalex.org/W3174969457","https://openalex.org/W3175195469","https://openalex.org/W3197025661","https://openalex.org/W3214121833","https://openalex.org/W3216235925","https://openalex.org/W4285004820","https://openalex.org/W4285327480","https://openalex.org/W4288723499","https://openalex.org/W4289534008","https://openalex.org/W4296437959","https://openalex.org/W4381614296","https://openalex.org/W4385568101","https://openalex.org/W4386528681","https://openalex.org/W4389664650"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W1975949872","https://openalex.org/W3159871278","https://openalex.org/W2230552005","https://openalex.org/W2905242764","https://openalex.org/W3109411864","https://openalex.org/W3017846737","https://openalex.org/W2390279801"],"abstract_inverted_index":{"At":[0],"ByteDance,":[1],"where":[2,116],"we":[3,43,87,99,196],"execute":[4],"over":[5,191,199],"a":[6,89,119,164],"million":[7],"Spark":[8,95,133,177,187],"jobs":[9,188],"and":[10,65,74,111,150,171,208,221],"handle":[11],"500PB":[12],"of":[13,28,186,218,223],"shuffled":[14],"data":[15],"daily,":[16],"ensuring":[17],"resource":[18,29,52,70,90,140],"efficiency":[19,30,53,91,125],"is":[20],"paramount":[21],"for":[22,94,138],"cost":[23],"savings.":[24],"However,":[25],"achieving":[26],"optimization":[27],"in":[31,54,80,189],"large-scale":[32],"production":[33,56,190],"environments":[34],"poses":[35],"significant":[36],"challenges.":[37],"Drawing":[38],"from":[39],"our":[40],"practical":[41],"experiences,":[42],"have":[44,197],"identified":[45],"three":[46],"key":[47],"issues":[48],"critical":[49],"to":[50,62,122],"addressing":[51],"real-world":[55],"settings:":[57],"1":[58,98],"slow":[59],"I/Os":[60],"leading":[61],"excessive":[63],"CPU":[64,201,219],"memory":[66,205,224],"idleness,":[67],"2":[68,129],"coarse-grained":[69],"control":[71,141],"causing":[72],"wastage,":[73],"3":[75,161],"sub-optimal":[76],"job":[77],"configurations":[78],"resulting":[79],"low":[81],"utilization.":[82],"To":[83],"tackle":[84],"these":[85,182],"issues,":[86],"propose":[88],"governance":[92],"framework":[93],"workloads.":[96],"Specifically,":[97],"devise":[100],"the":[101,132,192],"multi-mechanism":[102],"shuffle":[103,210],"services,":[104],"including":[105],"Enhanced":[106],"External":[107],"Shuffle":[108,113],"Service":[109,114],"(ESS)":[110],"Cloud":[112],"(CSS),":[115],"CSS":[117],"employs":[118],"push-based":[120],"approach":[121],"enhance":[123],"I/O":[124],"through":[126],"sequential":[127],"reading.":[128],"We":[130,162],"modify":[131],"configuration":[134,166,178],"parameter":[135],"protocol,":[136],"allowing":[137],"fine-grained":[139],"by":[142],"introducing":[143],"several":[144],"new":[145],"parameters":[146],"such":[147],"as":[148,152,154],"milliCores":[149],"memoryBurst,":[151],"well":[153],"supporting":[155],"operators":[156],"with":[157],"additional":[158],"spill":[159],"modes.":[160],"design":[163],"two-stage":[165],"autotuning":[167],"method,":[168],"comprising":[169],"rule-based":[170],"algorithm-based":[172],"tuning,":[173],"providing":[174],"more":[175],"reliable":[176],"optimizations.":[179],"By":[180],"deploying":[181],"techniques":[183],"on":[184],"millions":[185,217],"last":[193],"two":[194],"years,":[195],"achieved":[198],"22%":[200],"utilization":[202,206],"increase,":[203,207],"5%":[204],"10%":[209],"block":[211],"time":[212],"ratio":[213],"decrease,":[214],"effectively":[215],"saving":[216],"cores":[220],"petabytes":[222],"daily.":[225]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
