{"id":"https://openalex.org/W4414909307","doi":"https://doi.org/10.1109/cluster59342.2025.11186487","title":"BMPipe: Bubble-Memory Co-Optimization Strategy Planner for Very-Large DNN Training","display_name":"BMPipe: Bubble-Memory Co-Optimization Strategy Planner for Very-Large DNN Training","publication_year":2025,"publication_date":"2025-09-02","ids":{"openalex":"https://openalex.org/W4414909307","doi":"https://doi.org/10.1109/cluster59342.2025.11186487"},"language":"en","primary_location":{"id":"doi:10.1109/cluster59342.2025.11186487","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cluster59342.2025.11186487","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Cluster Computing (CLUSTER)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053902996","display_name":"Ruiwen Wang","orcid":"https://orcid.org/0000-0002-0044-7289"},"institutions":[{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]},{"id":"https://openalex.org/I51101395","display_name":"Universit\u00e9 Paris 1 Panth\u00e9on-Sorbonne","ror":"https://ror.org/002t25c44","country_code":"FR","type":"education","lineage":["https://openalex.org/I51101395"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ruiwen Wang","raw_affiliation_strings":["Sorbonne University,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sorbonne University,Paris,France","institution_ids":["https://openalex.org/I39804081","https://openalex.org/I51101395"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100424953","display_name":"Chong Li","orcid":"https://orcid.org/0000-0003-1910-155X"},"institutions":[{"id":"https://openalex.org/I4210123571","display_name":"Huawei Technologies (France)","ror":"https://ror.org/02rbzf697","country_code":"FR","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210123571"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Chong Li","raw_affiliation_strings":["Huawei Technologies France SASU,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Technologies France SASU,Paris,France","institution_ids":["https://openalex.org/I4210123571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080257325","display_name":"Thibaut Tachon","orcid":"https://orcid.org/0000-0003-3264-5535"},"institutions":[{"id":"https://openalex.org/I4210123571","display_name":"Huawei Technologies (France)","ror":"https://ror.org/02rbzf697","country_code":"FR","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210123571"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thibaut Tachon","raw_affiliation_strings":["Huawei Technologies France SASU,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Technologies France SASU,Paris,France","institution_ids":["https://openalex.org/I4210123571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019249875","display_name":"Raja Appuswamy","orcid":"https://orcid.org/0000-0001-5887-4091"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Raja Appuswamy","raw_affiliation_strings":["EURECOM,Biot,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"EURECOM,Biot,France","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101812366","display_name":"Teng Su","orcid":"https://orcid.org/0009-0005-9517-2845"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Teng Su","raw_affiliation_strings":["Huawei Technologies Co., Ltd.,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Technologies Co., Ltd.,Hangzhou,China","institution_ids":["https://openalex.org/I2250955327"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5991,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.73229479,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9230999946594238,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9230999946594238,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.900600016117096,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7764999866485596},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7114999890327454},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6820999979972839},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.580299973487854},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.545199990272522},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4016000032424927}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7914000153541565},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7764999866485596},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7114999890327454},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6820999979972839},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.580299973487854},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.545199990272522},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.489300012588501},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3833000063896179},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.361299991607666},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3334999978542328},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2838999927043915},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.28060001134872437},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27300000190734863},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25600001215934753}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cluster59342.2025.11186487","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cluster59342.2025.11186487","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Cluster Computing (CLUSTER)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-05564425v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05564425","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2025 IEEE International Conference on Cluster Computing (CLUSTER 2025), Sep 2025, Edimbourg, United Kingdom. &#x27E8;10.1109/CLUSTER59342.2025.11186487&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2906007643","https://openalex.org/W2969388332","https://openalex.org/W3132107458","https://openalex.org/W3138516171","https://openalex.org/W3204998121","https://openalex.org/W3206832494","https://openalex.org/W4327694855","https://openalex.org/W4386348101","https://openalex.org/W4395117922"],"related_works":[],"abstract_inverted_index":{"Pipeline":[0],"parallelism":[1],"and":[2,26,40,92,107,164],"activation":[3],"recomputation":[4,41],"are":[5],"widely":[6],"adopted":[7],"optimization":[8,48,63],"techniques,":[9],"among":[10],"others,":[11],"to":[12,32,66,147,154],"scale":[13,53],"DNN":[14,135],"training":[15],"on":[16,137],"large":[17],"accelerator":[18],"clusters.":[19],"However,":[20],"as":[21,168],"DNNs":[22],"grow":[23],"in":[24],"complexity":[25],"heterogeneity,":[27],"it":[28,166],"becomes":[29],"increasingly":[30],"difficult":[31],"determine":[33],"the":[34,155],"optimal":[35],"combination":[36],"of":[37,62,69],"pipeline":[38],"partitioning":[39],"strategies.":[42],"Existing":[43],"solutions":[44],"either":[45],"propose":[46],"manual":[47],"approaches":[49,56],"that":[50,57,82,103,113,143],"do":[51],"not":[52],"or":[54],"automated":[55],"explore":[58],"only":[59],"a":[60,78,110,125,138],"subset":[61],"possibilities":[64],"due":[65],"an":[67,118],"explosion":[68],"search":[70],"space.":[71],"In":[72,174],"this":[73],"paper,":[74],"we":[75,123,130],"present":[76],"BMPipe,":[77,122],"bubble-memory":[79],"co-optimization":[80],"planner":[81],"holistically":[83],"optimizes":[84],"computation":[85],"imbalance,":[86],"memory":[87],"under":[88],"utilization,":[89],"redundant":[90],"computation,":[91,105],"schedulinginduced":[93],"preparation":[94],"time.":[95],"At":[96],"its":[97],"core,":[98],"BMPipe":[99,144,176],"uses":[100],"symbolic":[101],"representations":[102],"unify":[104],"memory,":[106],"bubbles":[108],"into":[109],"single":[111],"model":[112],"is":[114],"solved":[115],"by":[116,181],"using":[117],"ILP-based":[119],"planner.":[120],"Using":[121],"perform":[124],"thorough":[126],"experimental":[127],"evaluation":[128],"where":[129],"train":[131],"several":[132],"large,":[133],"state-of-the-art":[134,156],"models":[136],"16K-NPU":[139],"cluster.":[140],"We":[141],"show":[142],"achieves":[145],"up":[146],"<tex":[148,169,182],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[149,170,183],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.36":[150],"\\times$</tex>":[151,172,187],"speedup":[152],"compared":[153,188],"solution":[157],"Megatron.":[158,190],"Against":[159],"automatic":[160],"planners":[161],"PipeDream,":[162],"Merak":[163],"AdaPipe,,":[165],"yields":[167],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.27":[171],"speed-up.":[173],"addition,":[175],"boosts":[177],"peak":[178],"device-memory":[179],"utilization":[180],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{1.":[184],"4":[185],"2}":[186],"with":[189]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
