{"id":"https://openalex.org/W4414198920","doi":"https://doi.org/10.1109/dac63849.2025.11133023","title":"Hydra: Harnessing Expert Popularity for Efficient Mixture-of-Expert Inference on Chiplet System","display_name":"Hydra: Harnessing Expert Popularity for Efficient Mixture-of-Expert Inference on Chiplet System","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198920","doi":"https://doi.org/10.1109/dac63849.2025.11133023"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11133023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071405687","display_name":"Siqi He","orcid":"https://orcid.org/0009-0002-0916-8486"},"institutions":[{"id":"https://openalex.org/I192209268","display_name":"Shaoxing University","ror":"https://ror.org/0435tej63","country_code":"CN","type":"education","lineage":["https://openalex.org/I192209268"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Siqi He","raw_affiliation_strings":["Shaoxin Laboratory,Shaoxing,Zhejiang,China"],"affiliations":[{"raw_affiliation_string":"Shaoxin Laboratory,Shaoxing,Zhejiang,China","institution_ids":["https://openalex.org/I192209268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037971089","display_name":"Haozhe Zhu","orcid":"https://orcid.org/0000-0002-6412-3996"},"institutions":[{"id":"https://openalex.org/I192209268","display_name":"Shaoxing University","ror":"https://ror.org/0435tej63","country_code":"CN","type":"education","lineage":["https://openalex.org/I192209268"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haozhe Zhu","raw_affiliation_strings":["Shaoxin Laboratory,Shaoxing,Zhejiang,China"],"affiliations":[{"raw_affiliation_string":"Shaoxin Laboratory,Shaoxing,Zhejiang,China","institution_ids":["https://openalex.org/I192209268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066408819","display_name":"Jiapei Zheng","orcid":"https://orcid.org/0009-0001-8917-735X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiapei Zheng","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101963648","display_name":"Lizhou Wu","orcid":"https://orcid.org/0009-0002-2570-7793"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lizhou Wu","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079670597","display_name":"Bo Jiao","orcid":"https://orcid.org/0000-0002-8173-8060"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Jiao","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100453158","display_name":"Qi Liu","orcid":"https://orcid.org/0000-0001-7062-831X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Liu","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100656792","display_name":"Xiaoyang Zeng","orcid":"https://orcid.org/0000-0003-3986-137X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyang Zeng","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051205321","display_name":"Chixiao Chen","orcid":"https://orcid.org/0000-0002-5980-4236"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chixiao Chen","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems (SKLICS),Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5071405687"],"corresponding_institution_ids":["https://openalex.org/I192209268"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23895907,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9045000076293945,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9045000076293945,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5947999954223633},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5523999929428101},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.5271000266075134},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.38119998574256897},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.36959999799728394},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.33559998869895935},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.31869998574256897},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.31380000710487366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8205000162124634},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5947999954223633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5726000070571899},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5523999929428101},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.5271000266075134},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3917999863624573},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.38119998574256897},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.31380000710487366},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C2780186347","wikidata":"https://www.wikidata.org/wiki/Q11414","display_name":"Subnetwork","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C60798267","wikidata":"https://www.wikidata.org/wiki/Q1226939","display_name":"Division (mathematics)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2563999891281128},{"id":"https://openalex.org/C75235859","wikidata":"https://www.wikidata.org/wiki/Q582659","display_name":"Exponential growth","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11133023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1505822854","https://openalex.org/W2062143991","https://openalex.org/W2980104813","https://openalex.org/W3016212306","https://openalex.org/W3111684448","https://openalex.org/W4220967350","https://openalex.org/W4312121019","https://openalex.org/W4321636575","https://openalex.org/W4389476193","https://openalex.org/W4393592481","https://openalex.org/W4394871723","https://openalex.org/W4400409890","https://openalex.org/W4401211627"],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"growth":[2],"of":[3,39,115,124],"model":[4,44,146],"sizes":[5],"in":[6,12,107],"advanced":[7],"artificial":[8],"intelligence":[9],"algorithms,":[10],"particularly":[11],"Transformerbased":[13],"large":[14],"language":[15],"models":[16,26],"(LLMs),":[17],"has":[18],"led":[19],"to":[20,71,83,99],"significant":[21],"computational":[22,45],"overhead.":[23],"Mixture-of-Expert":[24],"(MoE)":[25],"offer":[27],"a":[28,51,66,95,139],"solution":[29,143],"through":[30],"their":[31],"sparsely":[32],"gating":[33],"mechanism":[34],"but":[35],"introduce":[36],"new":[37],"challenges":[38],"extensive":[40],"all-to-all":[41],"communication":[42],"and":[43,94,103,118,121,127,132,141],"inefficiencies.":[46],"This":[47],"paper":[48],"presents":[49],"Hydra,":[50],"software/hardware":[52],"co-design":[53],"aimed":[54],"at":[55],"accelerating":[56],"MoE":[57,134,145],"inference":[58],"on":[59,90],"chiplet-based":[60],"architectures.":[61],"In":[62,75],"software,":[63],"Hydra":[64,111],"employs":[65],"popularity-aware":[67],"expert":[68],"mapping":[69],"strategy":[70],"optimize":[72],"interchiplet":[73],"communication.":[74],"hardware,":[76],"it":[77],"incorporates":[78],"Content":[79],"Addressable":[80],"Memory":[81],"(CAM)":[82],"eliminate":[84],"expensive":[85],"explicit":[86],"token":[87],"(un)-permutation":[88],"based":[89],"sparse":[91],"matrix":[92],"multiplications":[93],"redundant-calculation-skipping":[96],"softmax":[97],"engine":[98],"bypass":[100],"unnecessary":[101],"division":[102],"exponential":[104],"operations.":[105],"Evaluated":[106],"22":[108],"nm":[109],"technology,":[110],"achieves":[112],"latency":[113],"reductions":[114,123],"$14.2":[116],"\\times$":[117,120,126,129],"$3.5":[119],"power":[122],"$169.1":[125],"$18.9":[128],"over":[130],"GPU":[131],"state-of-the-art":[133],"accelerator,":[135],"respectively,":[136],"thereby":[137],"offering":[138],"scalable":[140],"efficient":[142],"for":[144],"deployment.":[147]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
