{"id":"https://openalex.org/W4416429983","doi":"https://doi.org/10.1109/hoti66940.2025.00025","title":"Characterizing Communication Patterns in Distributed Large Language Model Inference","display_name":"Characterizing Communication Patterns in Distributed Large Language Model Inference","publication_year":2025,"publication_date":"2025-08-20","ids":{"openalex":"https://openalex.org/W4416429983","doi":"https://doi.org/10.1109/hoti66940.2025.00025"},"language":null,"primary_location":{"id":"doi:10.1109/hoti66940.2025.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hoti66940.2025.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Symposium on High-Performance Interconnects (HOTI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101847978","display_name":"Lang Xu","orcid":"https://orcid.org/0009-0000-6307-4566"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lang Xu","raw_affiliation_strings":["The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055205226","display_name":"Kaushik Kandadi Suresh","orcid":"https://orcid.org/0000-0002-3705-2387"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaushik Kandadi Suresh","raw_affiliation_strings":["The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015153122","display_name":"Quentin Anthony","orcid":"https://orcid.org/0000-0002-6823-9080"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Quentin Anthony","raw_affiliation_strings":["The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061967476","display_name":"Nawras Alnaasan","orcid":"https://orcid.org/0000-0002-3638-4144"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nawras Alnaasan","raw_affiliation_strings":["The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. Panda","raw_affiliation_strings":["The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Dept. Computer Science &#x0026; Engineering,Columbus,OH,United States","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101847978"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48237861,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.17910000681877136,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.17910000681877136,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.14020000398159027,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.08630000054836273,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6708999872207642},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.527899980545044},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4722999930381775},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4140999913215637},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3659999966621399},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.3580000102519989},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.35040000081062317},{"id":"https://openalex.org/keywords/telecommunications-network","display_name":"Telecommunications network","score":0.33869999647140503}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8486999869346619},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6708999872207642},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5329999923706055},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4722999930381775},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4140999913215637},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3580000102519989},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C192126672","wikidata":"https://www.wikidata.org/wiki/Q1068715","display_name":"Telecommunications network","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30070000886917114},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.299699991941452},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.29010000824928284},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27549999952316284},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.2637999951839447},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.2574999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hoti66940.2025.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hoti66940.2025.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Symposium on High-Performance Interconnects (HOTI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2473258305","display_name":null,"funder_award_id":"2311830,2312927,2323116,2415201,2504944","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2766362889","https://openalex.org/W2898319404","https://openalex.org/W2986124642","https://openalex.org/W4321636575","https://openalex.org/W4387321091","https://openalex.org/W4391993458","https://openalex.org/W4400410176","https://openalex.org/W4402389578"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"built":[4],"on":[5],"transformer":[6],"architectures":[7,73],"have":[8],"transformed":[9],"natural":[10],"language":[11],"processing,":[12],"achieving":[13],"remarkable":[14],"performance":[15,33],"across":[16,93],"diverse":[17],"applications.":[18],"While":[19],"distributed":[20,48],"inference":[21,154],"frameworks":[22,155],"enable":[23],"practical":[24,137],"deployment":[25],"of":[26,71],"these":[27],"models,":[28],"inter-GPU":[29],"communication":[30,45,91,157],"creates":[31],"significant":[32],"constraints":[34],"that":[35,99],"limit":[36],"service":[37],"quality":[38],"in":[39,47,76,144],"real-world":[40],"systems.":[41],"This":[42],"paper":[43],"investigates":[44],"dynamics":[46],"LLM":[49,146],"serving-analyzing":[50],"how":[51],"various":[52],"parallelization":[53,95,142],"approaches":[54,126],"coordinate":[55],"data":[56,117],"exchange":[57],"between":[58],"GPU":[59],"workers":[60],"during":[61],"inference.":[62],"We":[63],"study":[64],"dense":[65],"transformer-based":[66],"models":[67,88],"as":[68],"representative":[69],"examples":[70],"contemporary":[72],"widely":[74],"used":[75],"operational":[77],"deployments.":[78],"Our":[79],"work":[80],"combines":[81],"detailed":[82],"profiling":[83],"measurements":[84],"with":[85],"predictive":[86],"analytical":[87],"to":[89,130],"characterize":[90],"behavior":[92],"different":[94],"configurations.":[96],"Results":[97],"show":[98],"tensor":[100],"parallelism":[101,115],"incurs":[102],"substantial":[103],"network":[104],"overhead":[105],"but":[106],"delivers":[107],"superior":[108],"response":[109],"times":[110],"for":[111,139,152],"brief":[112],"sequences,":[113],"pipeline":[114],"minimizes":[116],"transfer":[118],"requirements":[119],"while":[120],"increasing":[121],"total":[122],"latency,":[123],"and":[124,148,156],"combined":[125],"demand":[127],"careful":[128],"tuning":[129],"achieve":[131],"balanced":[132],"performance.":[133],"These":[134],"insights":[135],"offer":[136],"recommendations":[138],"selecting":[140],"appropriate":[141],"schemes":[143],"production":[145],"services":[147],"identify":[149],"key":[150],"opportunities":[151],"optimizing":[153],"infrastructure.":[158]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-20T00:00:00"}
