{"id":"https://openalex.org/W4292951128","doi":"https://doi.org/10.1142/s0129626422500050","title":"BaPipe: Balanced Pipeline Parallelism for DNN Training","display_name":"BaPipe: Balanced Pipeline Parallelism for DNN Training","publication_year":2022,"publication_date":"2022-08-19","ids":{"openalex":"https://openalex.org/W4292951128","doi":"https://doi.org/10.1142/s0129626422500050"},"language":"en","primary_location":{"id":"doi:10.1142/s0129626422500050","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0129626422500050","pdf_url":null,"source":{"id":"https://openalex.org/S18360026","display_name":"Parallel Processing Letters","issn_l":"0129-6264","issn":["0129-6264","1793-642X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102507634","display_name":"Letian Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Letian Zhao","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101888899","display_name":"Rui Xu","orcid":"https://orcid.org/0000-0002-5549-236X"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Xu","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100734913","display_name":"Tianqi Wang","orcid":"https://orcid.org/0000-0002-5921-6565"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianqi Wang","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017010290","display_name":"Teng Tian","orcid":"https://orcid.org/0000-0002-0594-5957"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Teng Tian","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100395727","display_name":"Xiaotian Wang","orcid":"https://orcid.org/0000-0002-6970-2997"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaotian Wang","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027254893","display_name":"Wei Wu","orcid":"https://orcid.org/0000-0002-2750-6365"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wu","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062473941","display_name":"Chio-in Ieong","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chio-In Ieong","raw_affiliation_strings":["Huawei Technologies, Shenzhen 518129, China"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies, Shenzhen 518129, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079471306","display_name":"Xi Jin","orcid":"https://orcid.org/0000-0002-4159-2925"},"institutions":[{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xi Jin","raw_affiliation_strings":["Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Department of Physics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I126520041"]},{"raw_affiliation_string":"State Key Laboratory of Particle Detection and Electronics, University of Science and Technology of China, Hefei 230026, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5079471306"],"corresponding_institution_ids":["https://openalex.org/I126520041","https://openalex.org/I4210119392"],"apc_list":null,"apc_paid":null,"fwci":0.7043,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.70074543,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"32","issue":"03n04","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8758664131164551},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.8004183769226074},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7323427200317383},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6721969842910767},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.6489381790161133},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6064242124557495},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5279273986816406},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.48437318205833435},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.44888415932655334},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.324370414018631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24674654006958008},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22327566146850586},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.06676846742630005}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8758664131164551},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.8004183769226074},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7323427200317383},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6721969842910767},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.6489381790161133},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6064242124557495},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5279273986816406},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.48437318205833435},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.44888415932655334},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.324370414018631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24674654006958008},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22327566146850586},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.06676846742630005},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0129626422500050","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0129626422500050","pdf_url":null,"source":{"id":"https://openalex.org/S18360026","display_name":"Parallel Processing Letters","issn_l":"0129-6264","issn":["0129-6264","1793-642X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6741947268","display_name":null,"funder_award_id":"HO2018085418","funder_id":"https://openalex.org/F4320322183","funder_display_name":"Huawei Technologies"}],"funders":[{"id":"https://openalex.org/F4320322183","display_name":"Huawei Technologies","ror":"https://ror.org/00cmhce21"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2124446952","https://openalex.org/W2155893237","https://openalex.org/W2194775991","https://openalex.org/W2302255633","https://openalex.org/W2612690371","https://openalex.org/W2618530766","https://openalex.org/W2626991402","https://openalex.org/W2734941459","https://openalex.org/W2807778630","https://openalex.org/W2890068895","https://openalex.org/W2904445802","https://openalex.org/W2950592884","https://openalex.org/W2969388332","https://openalex.org/W2969766737","https://openalex.org/W2972087877","https://openalex.org/W2973058717","https://openalex.org/W2982408761","https://openalex.org/W3004495293","https://openalex.org/W4212774754"],"related_works":["https://openalex.org/W2950520577","https://openalex.org/W1501159154","https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2494130044","https://openalex.org/W2033862586","https://openalex.org/W3170887803","https://openalex.org/W2963831937","https://openalex.org/W74409296","https://openalex.org/W3209384898"],"abstract_inverted_index":{"The":[0],"size":[1],"of":[2,12,33,90,103,115,125,148,177,196,204],"deep":[3,19],"neural":[4],"networks":[5,91],"(DNNs)":[6],"grows":[7],"rapidly":[8],"as":[9,106,108,160],"the":[10,13,31,81,87,101,109,121,146,174,182],"complexity":[11],"machine":[14],"learning":[15,20],"algorithm":[16],"increases.":[17],"Distributed":[18],"based":[21],"on":[22,72,169,207],"model":[23],"parallelism":[24,51,64,97,187],"has":[25],"been":[26],"widely":[27],"used":[28],"to":[29,37,61,92,140],"satisfy":[30],"requirements":[32],"DNN":[34,70,104],"training":[35,47,71],"related":[36],"computation":[38],"and":[39,65,83,112,153,163,172,189,200,210],"memory.":[40],"In":[41,76],"this":[42],"paper,":[43],"we":[44],"propose":[45],"a":[46,137,194],"framework":[48],"for":[49,69,86,185],"pipeline":[50,63,96,126,190],"called":[52],"BaPipe":[53,118,192],"(Balanced":[54],"Pipeline)":[55],"that":[56],"can":[57],"automatically":[58,119,141],"explore":[59],"methods":[60],"schedule":[62],"balanced":[66],"partition":[67,89],"strategies":[68],"heterogeneous":[73,211],"accelerator":[74,79],"clusters.":[75,179],"BaPipe,":[77],"each":[78,116],"calculates":[80],"forward":[82],"backward":[84],"propagation":[85],"assigned":[88],"implement":[93],"an":[94],"intra-batch":[95],"strategy.":[98],"By":[99],"considering":[100],"parameters":[102],"models":[105],"well":[107],"computation,":[110],"memory,":[111],"communication":[113],"resources":[114],"accelerator,":[117],"selects":[120],"most":[122],"suitable":[123],"method":[124],"scheduling":[127,132],"from":[128],"among":[129],"multiple":[130],"proposed":[131],"modes.":[133],"It":[134],"also":[135],"uses":[136],"novel":[138],"strategy":[139],"investigate":[142],"load":[143],"balancing":[144],"in":[145],"context":[147],"inter-layer":[149],"partition,":[150,152],"intra-layer":[151],"coarse-grained":[154],"partition.":[155],"We":[156],"trained":[157],"such":[158],"DNNs":[159],"VGG-16,":[161],"ResNet-50,":[162],"Google\u2019s":[164],"Neural":[165],"Machine":[166],"Translation":[167],"(GNMT)":[168],"GPU":[170],"clusters,":[171],"simulated":[173],"training-related":[175],"performance":[176],"FPGA":[178],"Compared":[180],"with":[181],"state-of-the-art":[183],"frameworks":[184],"data":[186],"(DP)":[188],"parallelism,":[191],"provides":[193],"speedup":[195],"[Formula:":[197,201],"see":[198,202],"text]":[199,203],"memory":[205],"reduction":[206],"various":[208],"homogeneous":[209],"platforms.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
