{"id":"https://openalex.org/W7156646573","doi":"https://doi.org/10.48550/arxiv.2604.23150","title":"Scaling Multi-Node Mixture-of-Experts Inference Using Expert Activation Patterns","display_name":"Scaling Multi-Node Mixture-of-Experts Inference Using Expert Activation Patterns","publication_year":2026,"publication_date":"2026-04-25","ids":{"openalex":"https://openalex.org/W7156646573","doi":"https://doi.org/10.48550/arxiv.2604.23150"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.23150","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.23150","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.23150","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134749594","display_name":"Abhimanyu Bambhaniya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bambhaniya, Abhimanyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069792279","display_name":"Geonhwa Jeong","orcid":"https://orcid.org/0000-0001-6659-3927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jeong, Geonhwa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134765785","display_name":"Jason Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jason","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008163714","display_name":"Jiecao Yu","orcid":"https://orcid.org/0000-0003-2085-0312"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Jiecao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134787289","display_name":"Jaewon Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Jaewon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134811399","display_name":"Pengchao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Pengchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101667271","display_name":"Changkyu Kim","orcid":"https://orcid.org/0000-0002-0283-8371"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Changkyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070094618","display_name":"Chunqiang Tang","orcid":"https://orcid.org/0009-0004-0133-4800"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Chunqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134816402","display_name":"Tushar Krishna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishna, Tushar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.33660000562667847,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.33660000562667847,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.04529999941587448,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.03739999979734421,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6877999901771545},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.5544999837875366},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5512999892234802},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4961000084877014},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4758000075817108},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4478999972343445},{"id":"https://openalex.org/keywords/subject-matter-expert","display_name":"Subject-matter expert","score":0.4433000087738037},{"id":"https://openalex.org/keywords/popularity","display_name":"Popularity","score":0.43130001425743103}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7409999966621399},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6877999901771545},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.5544999837875366},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5512999892234802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5325999855995178},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4961000084877014},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4758000075817108},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47360000014305115},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4478999972343445},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.4433000087738037},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.43130001425743103},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4092000126838684},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.33410000801086426},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.328000009059906},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.3050000071525574},{"id":"https://openalex.org/C3020580240","wikidata":"https://www.wikidata.org/wiki/Q663272","display_name":"Expert opinion","level":2,"score":0.3043000102043152},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2994000017642975},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2922999858856201},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C106347477","wikidata":"https://www.wikidata.org/wiki/Q5384228","display_name":"Equating","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.26249998807907104}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.23150","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.23150","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.23150","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.23150","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Most":[0],"recent":[1],"state-of-the-art":[2],"(SOTA)":[3],"large":[4],"language":[5],"models":[6,167],"(LLMs)":[7],"use":[8],"Mixture-of-Experts":[9],"(MoE)":[10],"architectures":[11],"to":[12,52,55,154,158,178],"scale":[13,31],"model":[14],"capacity":[15],"without":[16],"proportional":[17],"per-token":[18],"compute,":[19],"enabling":[20],"higher-quality":[21],"outputs":[22],"at":[23,30],"manageable":[24],"serving":[25],"costs.":[26],"However,":[27],"MoE":[28,74,110,183],"inference":[29],"is":[32],"fundamentally":[33],"bottlenecked":[34],"by":[35,141],"expert":[36,92,98,113,117,120,138,151],"load":[37,114],"imbalance":[38],"and":[39,82,87,130,136,149,168,186],"inefficient":[40],"token":[41,156],"routing,":[42],"especially":[43],"in":[44,59,181],"multi-node":[45],"deployments":[46],"where":[47,119],"tokens":[48],"are":[49],"not":[50],"guaranteed":[51],"be":[53],"routed":[54],"local":[56],"experts,":[57],"resulting":[58,180],"significant":[60],"inter-node":[61,164],"all-to-all":[62],"communication":[63,175],"overhead.":[64],"To":[65],"systematically":[66],"characterize":[67],"these":[68,142,170],"challenges,":[69],"we":[70,101,144],"profile":[71],"SOTA":[72],"open-source":[73],"models,":[75],"including":[76],"Llama":[77],"4":[78],"Maverick,":[79],"DeepSeek":[80],"V3-671B,":[81],"Qwen3-230B-A22B,":[83],"on":[84],"various":[85,103],"datasets":[86],"collected":[88],"over":[89],"100k":[90],"real":[91],"activation":[93,99,118],"traces.":[94],"Upon":[95],"studying":[96],"the":[97,108,159],"patterns,":[100],"uncover":[102],"persistent":[104],"properties":[105],"across":[106,123],"all":[107],"frontier":[109],"models:":[111],"variable":[112],"imbalance,":[115],"domain-specific":[116],"popularity":[121],"shifts":[122],"task":[124],"families":[125],"(code,":[126],"math,":[127],"chat,":[128],"general),":[129],"a":[131],"strong":[132],"correlation":[133],"between":[134],"prefill":[135],"decode":[137,184],"activations.":[139],"Motivated":[140],"findings,":[143],"propose":[145],"workload-aware":[146],"micro-batch":[147],"grouping":[148],"an":[150],"placement":[152],"strategy":[153],"maximize":[155],"locality":[157],"destination":[160],"expert,":[161],"thereby":[162],"reducing":[163],"communication.":[165],"Across":[166],"datasets,":[169],"optimizations":[171],"help":[172],"reduce":[173],"all2all":[174],"data":[176],"up":[177],"20,":[179],"lower":[182],"latency":[185],"better":[187],"accelerator":[188],"utilization.":[189]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-29T00:00:00"}
