{"id":"https://openalex.org/W7155538696","doi":"https://doi.org/10.48550/arxiv.2604.21072","title":"Distributed Generative Inference of LLM at Internet Scales with Multi-Dimensional Communication Optimization","display_name":"Distributed Generative Inference of LLM at Internet Scales with Multi-Dimensional Communication Optimization","publication_year":2026,"publication_date":"2026-04-22","ids":{"openalex":"https://openalex.org/W7155538696","doi":"https://doi.org/10.48550/arxiv.2604.21072"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.21072","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21072","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.21072","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045591391","display_name":"Jiu Chen","orcid":"https://orcid.org/0000-0001-8185-8575"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Jiu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083107581","display_name":"Shuangyan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Shuangyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134546099","display_name":"Xu Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134487476","display_name":"Hexiao Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Hexiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134526810","display_name":"Xinran Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xinran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134526563","display_name":"Jie Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134544105","display_name":"Dong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5045591391"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.3197000026702881,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.3197000026702881,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.23180000483989716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.10930000245571136,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7777000069618225},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.4650000035762787},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.44690001010894775},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.43320000171661377},{"id":"https://openalex.org/keywords/telecommunications-network","display_name":"Telecommunications network","score":0.4284000098705292},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4244000017642975},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.41179999709129333},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4088999927043915},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.40149998664855957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8093000054359436},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7777000069618225},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.527899980545044},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.47209998965263367},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.4650000035762787},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.44690001010894775},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.43320000171661377},{"id":"https://openalex.org/C192126672","wikidata":"https://www.wikidata.org/wiki/Q1068715","display_name":"Telecommunications network","level":2,"score":0.4284000098705292},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4244000017642975},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.41179999709129333},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4088999927043915},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.40149998664855957},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35199999809265137},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.3434000015258789},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.33570000529289246},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C12269588","wikidata":"https://www.wikidata.org/wiki/Q132364","display_name":"Communications protocol","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3228999972343445},{"id":"https://openalex.org/C200157131","wikidata":"https://www.wikidata.org/wiki/Q4854763","display_name":"Bandwidth allocation","level":3,"score":0.32269999384880066},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.30149999260902405},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2757999897003174},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.2728999853134155},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.21072","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21072","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.21072","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21072","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Decentralized":[0],"LLM":[1,42,127],"inference":[2,43,128],"distributes":[3],"computation":[4],"among":[5],"heterogeneous":[6],"nodes":[7],"across":[8,97],"the":[9,23,30,62],"internet,":[10],"offering":[11],"a":[12,98],"performant":[13],"and":[14,50,71,82,103],"cost-efficient":[15],"solution,":[16],"alternative":[17],"to":[18,53,86,90,112,121,124],"traditional":[19],"centralized":[20],"inference.":[21],"However,":[22],"low":[24],"cross-node":[25],"network":[26,88,101],"bandwidth":[27],"makes":[28],"communication":[29,55,92],"primary":[31],"bottleneck.":[32],"In":[33],"this":[34],"paper,":[35],"we":[36],"introduce":[37],"BloomBee,":[38],"an":[39,68],"internet-scale":[40],"distributed":[41],"framework.":[44],"BloomBee":[45,60,77,96,130],"integrates":[46],"LLM-layer":[47],"assignment,":[48],"micro-batching":[49],"tensor":[51],"offloading":[52],"optimize":[54],"from":[56],"multiple":[57],"dimensions.":[58],"Additionally,":[59],"formulates":[61],"coordination":[63],"of":[64,100],"these":[65],"techniques":[66],"as":[67],"optimization":[69],"problem":[70],"solves":[72],"it":[73,106],"using":[74],"dynamic":[75],"programming.":[76],"also":[78,115],"customizes":[79],"lossless":[80],"compression":[81],"speculative":[83],"decoding":[84],"according":[85],"low-bandwidth":[87],"settings":[89],"reduce":[91],"overhead.":[93],"We":[94],"evaluate":[95],"spectrum":[99],"environments":[102],"show":[104],"that":[105],"improves":[107],"service":[108],"throughput":[109],"by":[110,119],"up":[111,120],"1.76x.":[113],"It":[114],"reduces":[116],"average":[117],"latency":[118],"43.20%":[122],"compared":[123],"state-of-the-art":[125],"decentralized":[126],"systems.":[129],"is":[131],"open-sourced.":[132]},"counts_by_year":[],"updated_date":"2026-04-25T06:06:54.107920","created_date":"2026-04-25T00:00:00"}
