{"id":"https://openalex.org/W4410229517","doi":"https://doi.org/10.1109/wcnc61545.2025.10978332","title":"A Transformer-Block-Wise Collaborative Training Mechanism with Hybrid Parallelism Over Heterogeneous Networks","display_name":"A Transformer-Block-Wise Collaborative Training Mechanism with Hybrid Parallelism Over Heterogeneous Networks","publication_year":2025,"publication_date":"2025-03-24","ids":{"openalex":"https://openalex.org/W4410229517","doi":"https://doi.org/10.1109/wcnc61545.2025.10978332"},"language":"en","primary_location":{"id":"doi:10.1109/wcnc61545.2025.10978332","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wcnc61545.2025.10978332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Wireless Communications and Networking Conference (WCNC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101575519","display_name":"Jiewei Chen","orcid":"https://orcid.org/0009-0001-5438-9934"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I918919364","display_name":"Switch","ror":"https://ror.org/02yw51758","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I918919364"]}],"countries":["CH","CN"],"is_corresponding":true,"raw_author_name":"Jiewei Chen","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology","institution_ids":["https://openalex.org/I139759216","https://openalex.org/I918919364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101992293","display_name":"Jingrong Wang","orcid":"https://orcid.org/0000-0003-3392-7016"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I918919364","display_name":"Switch","ror":"https://ror.org/02yw51758","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I918919364"]}],"countries":["CH","CN"],"is_corresponding":false,"raw_author_name":"Jingrong Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology","institution_ids":["https://openalex.org/I139759216","https://openalex.org/I918919364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047467022","display_name":"Shaoyong Guo","orcid":"https://orcid.org/0000-0003-2033-8431"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I918919364","display_name":"Switch","ror":"https://ror.org/02yw51758","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I918919364"]}],"countries":["CH","CN"],"is_corresponding":false,"raw_author_name":"Shaoyong Guo","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology","institution_ids":["https://openalex.org/I139759216","https://openalex.org/I918919364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025633536","display_name":"Jiakai Hao","orcid":"https://orcid.org/0009-0002-0503-2194"},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiakai Hao","raw_affiliation_strings":["State Grid Beijing Information &#x0026; Communication Company"],"affiliations":[{"raw_affiliation_string":"State Grid Beijing Information &#x0026; Communication Company","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042944721","display_name":"Xuesong Qiu","orcid":"https://orcid.org/0000-0002-7899-539X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I918919364","display_name":"Switch","ror":"https://ror.org/02yw51758","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I918919364"]}],"countries":["CH","CN"],"is_corresponding":false,"raw_author_name":"Xuesong Qiu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,State Key Laboratory of Networking and Switching Technology","institution_ids":["https://openalex.org/I139759216","https://openalex.org/I918919364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005327587","display_name":"Zehui Xiong","orcid":"https://orcid.org/0000-0002-4440-941X"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zehui Xiong","raw_affiliation_strings":["Pillar of Information Systems Technology and Design, Singapore University of Technology and Design"],"affiliations":[{"raw_affiliation_string":"Pillar of Information Systems Technology and Design, Singapore University of Technology and Design","institution_ids":["https://openalex.org/I152815399"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101575519"],"corresponding_institution_ids":["https://openalex.org/I139759216","https://openalex.org/I918919364"],"apc_list":null,"apc_paid":null,"fwci":2.8414,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90665973,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"01","last_page":"06"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.6991000175476074,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.6991000175476074,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.6725999712944031,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6421999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7573282718658447},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5676602125167847},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5626494288444519},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5333424210548401},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4911310374736786},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.48980140686035156},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.43331044912338257},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4167410135269165},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3830254077911377},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.34093159437179565},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09038844704627991},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07824215292930603},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.06494581699371338}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7573282718658447},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5676602125167847},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5626494288444519},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5333424210548401},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4911310374736786},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.48980140686035156},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.43331044912338257},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4167410135269165},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3830254077911377},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34093159437179565},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09038844704627991},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07824215292930603},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.06494581699371338},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wcnc61545.2025.10978332","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wcnc61545.2025.10978332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Wireless Communications and Networking Conference (WCNC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2039799818","display_name":null,"funder_award_id":"62271072","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2896457183","https://openalex.org/W2991040477","https://openalex.org/W4226128266","https://openalex.org/W4316252350","https://openalex.org/W4319663761","https://openalex.org/W4319778953","https://openalex.org/W4379033976","https://openalex.org/W4390490761","https://openalex.org/W4390828296","https://openalex.org/W4396878072","https://openalex.org/W4399665748","https://openalex.org/W6739901393","https://openalex.org/W6778883912"],"related_works":["https://openalex.org/W2950520577","https://openalex.org/W1501159154","https://openalex.org/W2003935582","https://openalex.org/W74409296","https://openalex.org/W1554644772","https://openalex.org/W2494130044","https://openalex.org/W2468095077","https://openalex.org/W29548032","https://openalex.org/W2593878938","https://openalex.org/W305742777"],"abstract_inverted_index":{"With":[0],"the":[1,26,35,56,77,90,112,116,136,141,151,155,164,168],"rise":[2],"of":[3,16,29,38,79,157,166,171],"AI-Generated":[4],"Content":[5],"(AIGC)":[6],"services":[7],"in":[8,50],"wireless":[9],"networks,":[10],"efficient":[11],"and":[12,47,105,154,191],"high-quality":[13],"distributed":[14],"training":[15,78,173,184],"Large":[17],"Language":[18],"Models":[19],"(LLMs)":[20],"has":[21],"become":[22],"essential":[23],"for":[24],"enabling":[25,135],"large-scale":[27],"application":[28],"next":[30],"generation":[31],"AI":[32],"technologies.":[33],"However,":[34],"extensive":[36],"parameters":[37,104],"LLMs":[39],"impose":[40],"significant":[41],"demands":[42],"on":[43,132,185],"memory,":[44],"computing":[45],"power":[46],"communication":[48],"resources":[49],"heterogeneous":[51],"networks.":[52],"To":[53],"efficiently":[54],"utilize":[55],"dispersed":[57],"network":[58,160],"resources,":[59],"this":[60],"paper":[61],"presents":[62],"a":[63,70,84,127],"First-Pipeline-":[64],"Then-Federated":[65],"Learning":[66],"(FPTFL)":[67],"approach":[68,180],"with":[69,163],"hybrid":[71],"parallel":[72],"scheduling":[73],"strategy":[74],"to":[75,88,111,138,149],"facilitate":[76],"Transformer-based":[80,182],"LLMs.":[81],"We":[82],"propose":[83],"block-wise":[85],"splitting":[86],"mechanism":[87],"partition":[89],"Transformer's":[91],"encoder":[92,103],"into":[93],"distinct":[94],"segments,":[95],"which":[96],"are":[97,109],"deployed":[98],"cross":[99],"individual":[100],"devices.":[101],"The":[102],"intermediate":[106],"smashed":[107],"data":[108],"uploaded":[110],"edge":[113],"server,":[114],"where":[115],"whole":[117],"model":[118,183,189],"is":[119,147],"updated":[120],"through":[121],"federated":[122],"aggregation.":[123],"Particularly,":[124],"we":[125],"develop":[126],"fine-grained":[128],"computation-efficient":[129],"method":[130],"based":[131],"pipeline":[133],"parallelism,":[134],"segments":[137,153],"cooperatively":[139],"train":[140],"entire":[142],"encoder.":[143],"An":[144],"optimization":[145],"problem":[146],"formulated":[148],"determine":[150],"LLM":[152,172],"number":[156],"micro-batches":[158],"under":[159],"resource":[161],"constraints,":[162],"goal":[165],"minimizing":[167],"total":[169],"latency":[170],"services.":[174],"Simulation":[175],"results":[176],"demonstrate":[177],"that":[178],"our":[179],"enables":[181],"resource-constrained":[186],"devices,":[187],"preserves":[188],"performance,":[190],"reduces":[192],"waiting":[193],"time.":[194]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
