{"id":"https://openalex.org/W4411486315","doi":"https://doi.org/10.1145/3695053.3731025","title":"Chimera: Communication Fusion for Hybrid Parallelism in Large Language Models","display_name":"Chimera: Communication Fusion for Hybrid Parallelism in Large Language Models","publication_year":2025,"publication_date":"2025-06-20","ids":{"openalex":"https://openalex.org/W4411486315","doi":"https://doi.org/10.1145/3695053.3731025"},"language":"en","primary_location":{"id":"doi:10.1145/3695053.3731025","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731025","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731025","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731025","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006484983","display_name":"Le Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Le Qin","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113173882","display_name":"Junwei Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junwei Cui","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090650251","display_name":"Weilin Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weilin Cai","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040392661","display_name":"Jiayi Huang","orcid":"https://orcid.org/0000-0003-4011-6668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiayi Huang","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5006484983"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7145,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.845234,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"498","last_page":"513"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13062","display_name":"Cognitive Computing and Networks","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7204383611679077},{"id":"https://openalex.org/keywords/chimera","display_name":"Chimera (genetics)","score":0.6548765897750854},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5437402129173279},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.48905929923057556},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.47821733355522156},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.34131017327308655},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09385809302330017},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.07206699252128601}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204383611679077},{"id":"https://openalex.org/C30278631","wikidata":"https://www.wikidata.org/wiki/Q281721","display_name":"Chimera (genetics)","level":3,"score":0.6548765897750854},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5437402129173279},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.48905929923057556},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.47821733355522156},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.34131017327308655},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09385809302330017},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.07206699252128601},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695053.3731025","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731025","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731025","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3695053.3731025","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731025","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731025","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1477544716","display_name":null,"funder_award_id":"Guangdong","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5401505394","display_name":null,"funder_award_id":"62402411","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7356250002","display_name":null,"funder_award_id":"2024YFB4505800","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323537","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411486315.pdf","grobid_xml":"https://content.openalex.org/works/W4411486315.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W1997674404","https://openalex.org/W2114853176","https://openalex.org/W2118231264","https://openalex.org/W2131613942","https://openalex.org/W2170901118","https://openalex.org/W3016395792","https://openalex.org/W3036878841","https://openalex.org/W3081168214","https://openalex.org/W3097528158","https://openalex.org/W3188065709","https://openalex.org/W3190806564","https://openalex.org/W3193985311","https://openalex.org/W3204998121","https://openalex.org/W3208319482","https://openalex.org/W4220694664","https://openalex.org/W4220967350","https://openalex.org/W4225108562","https://openalex.org/W4281790033","https://openalex.org/W4297097318","https://openalex.org/W4297097426","https://openalex.org/W4360831831","https://openalex.org/W4376652719","https://openalex.org/W4380874786","https://openalex.org/W4385571616","https://openalex.org/W4393406935","https://openalex.org/W4394998532","https://openalex.org/W4395117348","https://openalex.org/W4399452094","https://openalex.org/W4401211704","https://openalex.org/W4404955085","https://openalex.org/W4405756071"],"related_works":["https://openalex.org/W2064439451","https://openalex.org/W2897519367","https://openalex.org/W2034828171","https://openalex.org/W4391974846","https://openalex.org/W2887889047","https://openalex.org/W2583248760","https://openalex.org/W4316500148","https://openalex.org/W1974642509","https://openalex.org/W1595672120","https://openalex.org/W4230999561"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs),":[3],"exemplified":[4],"by":[5,120],"ChatGPT,":[6],"have":[7],"emerged":[8],"as":[9],"a":[10,38,51,63,91],"predominant":[11],"workload":[12],"in":[13,98,114,140],"current":[14],"machine":[15],"learning":[16],"systems.To":[17],"achieve":[18],"efficient":[19,83],"training":[20],"and":[21,84,117,130,159],"inference":[22],"within":[23,48],"the":[24,67,70,77,102,111,152],"constraints":[25],"of":[26,54,79,105,155],"limited":[27],"single-NPU":[28],"memory":[29],"capacity,":[30],"deploying":[31],"LLMs":[32,49],"on":[33,66,162,171],"multi-NPU":[34,68,80,165],"systems":[35,81,166],"typically":[36],"adopt":[37],"hybrid":[39,46,96,115,141],"approach":[40],"that":[41,146],"combines":[42],"various":[43],"parallelism":[44,47,97,108,116,126],"patterns.This":[45],"introduces":[50,89],"significant":[52],"amount":[53],"diverse":[55],"collective":[56],"communications.However,":[57],"these":[58],"frequent":[59],"blocking":[60],"communications":[61],"impose":[62],"substantial":[64],"burden":[65],"systems.Overcoming":[69],"communication":[71,92,103,112,123,133,138,177],"bottleneck":[72,139],"is":[73],"crucial":[74],"to":[75],"unlocking":[76],"potential":[78],"for":[82,95],"scalable":[85],"LLM":[86,107,142,156],"processing.This":[87],"paper":[88],"Chimera,":[90],"fusion":[93],"mechanism":[94],"LLMs.We":[99],"comprehensively":[100],"analyze":[101],"processes":[104],"each":[106],"pattern,":[109],"identify":[110],"redundancy":[113,119],"eliminate":[118],"fusing":[121],"adjacent":[122],"operators":[124],"during":[125],"transformation.By":[127],"reordering":[128],"operations":[129],"generating":[131],"redundancy-free":[132],"operator,":[134],"Chimera":[135,147],"effectively":[136],"mitigates":[137],"parallelism.Our":[143],"results":[144],"show":[145],"achieves":[148,167],"1.23-7.06network":[149],"bandwidth":[150],"speedup.Additionally,":[151],"end-to-end":[153],"performance":[154],"forward":[157],"pass":[158,161],"backward":[160],"different":[163],"typical":[164],"respective":[168],"1.32-1.58and":[169],"1.16-1.36speedups":[170],"average":[172],"compared":[173],"with":[174],"those":[175],"without":[176],"fusion.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
