{"id":"https://openalex.org/W7133527484","doi":"https://doi.org/10.1109/hpca68181.2026.11408594","title":"MoEntwine: Unleashing the Potential of Wafer-Scale Chips for Large-Scale Expert Parallel Inference","display_name":"MoEntwine: Unleashing the Potential of Wafer-Scale Chips for Large-Scale Expert Parallel Inference","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133527484","doi":"https://doi.org/10.1109/hpca68181.2026.11408594"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103213494","display_name":"Xinru Tang","orcid":"https://orcid.org/0009-0004-6038-3709"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinru Tang","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084637087","display_name":"Jingxiang Hou","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingxiang Hou","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102699169","display_name":"Dingcheng Jiang","orcid":"https://orcid.org/0009-0004-4379-694X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dingcheng Jiang","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069838515","display_name":"Tingcha Wei","orcid":"https://orcid.org/0000-0001-7619-4313"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Taiquan Wei","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128045138","display_name":"Jiaxin Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxin Liu","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070641802","display_name":"Jinyi Deng","orcid":"https://orcid.org/0000-0001-8666-8463"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinyi Deng","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128126107","display_name":"Huizheng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huizheng Wang","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128087423","display_name":"Qize Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qize Yang","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101256935","display_name":"Haoran Shang","orcid":"https://orcid.org/0009-0004-6972-9235"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoran Shang","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007581833","display_name":"Chang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Li","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068155662","display_name":"Yuanming Hu","orcid":"https://orcid.org/0000-0002-1136-9909"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Hu","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078843314","display_name":"Shouyi Yin","orcid":"https://orcid.org/0000-0002-8438-8588"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shouyi Yin","raw_affiliation_strings":["Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, School of Integrated Circuits,BNRist,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5103213494"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":39.4476,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.99552182,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2565000057220459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2565000057220459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.09830000251531601,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.08299999684095383,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.4260999858379364},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4027000069618225},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3034999966621399},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.23999999463558197},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.23729999363422394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6721000075340271},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42559999227523804},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4027000069618225},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2700999975204468},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.23999999463558197},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.23729999363422394},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.23600000143051147},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.22390000522136688}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1187591493","display_name":null,"funder_award_id":"2022ZD0115200","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G8488329147","display_name":null,"funder_award_id":"62125403,62502255,U24A20234,92464302,U24B20164","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329777","display_name":"Beijing National Research Center For Information Science And Technology","ror":null},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1964981582","https://openalex.org/W2613168994","https://openalex.org/W2749199255","https://openalex.org/W2794478957","https://openalex.org/W2884071170","https://openalex.org/W3096609285","https://openalex.org/W3211730428","https://openalex.org/W4220967350","https://openalex.org/W4297097426","https://openalex.org/W4360605382","https://openalex.org/W4381886086","https://openalex.org/W4386260498","https://openalex.org/W4387321091","https://openalex.org/W4392567268","https://openalex.org/W4401023847","https://openalex.org/W4401508784","https://openalex.org/W4402475845","https://openalex.org/W4404955085","https://openalex.org/W4405220108","https://openalex.org/W4405910844","https://openalex.org/W4406703768","https://openalex.org/W4409248734","https://openalex.org/W4411486243","https://openalex.org/W4411486315","https://openalex.org/W4411486513","https://openalex.org/W4411486567","https://openalex.org/W4411688694","https://openalex.org/W4415792806","https://openalex.org/W4415796399","https://openalex.org/W7133217344"],"related_works":[],"abstract_inverted_index":{"As":[0],"large":[1],"language":[2],"models":[3,20],"(LLMs)":[4],"continue":[5],"to":[6,26,34,95,113,142,171,207,244,247],"scale":[7],"up,":[8],"mixture-of-experts":[9],"(MoE)":[10],"has":[11],"become":[12],"a":[13,64,70,75,83,96,184],"common":[14],"technology":[15],"in":[16,42,162,219],"SOTA":[17,228],"models.":[18,89],"MoE":[19,88,140,166,220,241],"rely":[21],"on":[22,69,117],"expert":[23,115,186],"parallelism":[24],"(EP)":[25],"alleviate":[27],"memory":[28],"bottleneck,":[29],"which":[30,133,182],"introduces":[31],"all-to-all":[32,50],"communication":[33,48,101,144,204],"dispatch":[35],"and":[36,103,139,146,159,165,191,215,222],"combine":[37],"tokens":[38],"across":[39],"devices.":[40],"However,":[41],"widely-adopted":[43],"GPU":[44],"clusters,":[45],"high-overhead":[46,114],"crossnode":[47],"makes":[49],"expensive,":[51],"hindering":[52],"the":[53,107,118,135,155,163,173,178,194,227,231],"adoption":[54],"of":[55,109,137,157,197],"EP.":[56,249],"Recently,":[57],"wafer-scale":[58],"chips":[59],"(WSCs)":[60],"have":[61],"emerged":[62],"as":[63],"platform":[65,233],"integrating":[66],"numerous":[67],"devices":[68],"wafer-sized":[71],"interposer.":[72],"WSCs":[73],"provide":[74],"unified":[76],"high-performance":[77],"network":[78,92],"connecting":[79],"all":[80],"devices,":[81],"presenting":[82],"promising":[84],"potential":[85],"for":[86],"hosting":[87],"Yet,":[90],"their":[91],"is":[93,168],"restricted":[94],"mesh":[97],"topology,":[98],"causing":[99],"imbalanced":[100],"pressure":[102,145],"performance":[104,242],"loss.":[105],"Moreover,":[106],"lack":[108],"on-wafer":[110],"disk":[111],"leads":[112],"migration":[116,174,187],"critical":[119],"path.":[120],"To":[121],"fully":[122],"unleash":[123],"this":[124],"potential,":[125],"we":[126,176],"first":[127],"propose":[128,177],"Entwined":[129],"Ring":[130],"Mapping":[131],"(ER-Mapping),":[132],"co-designs":[134],"mapping":[136],"attention":[138,164],"layers":[141,167],"balance":[143],"achieve":[147],"better":[148],"performance.":[149],"We":[150],"find":[151],"that":[152],"under":[153],"ER-Mapping,":[154],"distribution":[156],"cold":[158,195],"hot":[160],"links":[161,196],"complementary.":[169],"Therefore,":[170],"hide":[172],"overhead,":[175],"Non-invasive":[179],"Balancer":[180],"(NI-Balancer),":[181],"splits":[183],"complete":[185],"into":[188],"multiple":[189],"steps":[190],"alternately":[192],"utilizes":[193],"both":[198],"layers.":[199],"Evaluation":[200],"shows":[201],"ER-Mapping":[202],"achieves":[203],"reduction":[205],"up":[206],"62":[208],"%.":[209],"NIBalancer":[210],"further":[211],"delivers":[212,234],"54":[213],"%":[214,217,238],"22":[216],"improvements":[218],"computation":[221],"communication,":[223],"respectively.":[224],"Compared":[225],"with":[226],"NVL72":[229],"supernode,":[230],"WSC":[232],"an":[235],"average":[236],"39":[237],"higher":[239],"per-device":[240],"owing":[243],"its":[245],"scalability":[246],"larger":[248]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-03-05T00:00:00"}
