{"id":"https://openalex.org/W4396817294","doi":"https://doi.org/10.1109/tsc.2024.3399654","title":"MoESys: A Distributed and Efficient Mixture-of-Experts Training and Inference System for Internet Services","display_name":"MoESys: A Distributed and Efficient Mixture-of-Experts Training and Inference System for Internet Services","publication_year":2024,"publication_date":"2024-05-10","ids":{"openalex":"https://openalex.org/W4396817294","doi":"https://doi.org/10.1109/tsc.2024.3399654"},"language":"en","primary_location":{"id":"doi:10.1109/tsc.2024.3399654","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsc.2024.3399654","pdf_url":null,"source":{"id":"https://openalex.org/S204223317","display_name":"IEEE Transactions on Services Computing","issn_l":"1939-1374","issn":["1939-1374","2372-0204"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Services Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084155236","display_name":"Dianhai Yu","orcid":"https://orcid.org/0000-0002-0163-2603"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dianhai Yu","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0163-2603","affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026318854","display_name":"Liang Shen","orcid":"https://orcid.org/0000-0001-7976-668X"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Shen","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100591709","display_name":"Hongxiang Hao","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongxiang Hao","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055804012","display_name":"Weibao Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weibao Gong","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030773612","display_name":"Huachao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huachao Wu","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-8545-7818","affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021438219","display_name":"Jiang Bian","orcid":"https://orcid.org/0000-0001-6997-1989"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiang Bian","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6997-1989","affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057227915","display_name":"Li-Rong Dai","orcid":"https://orcid.org/0000-0002-0859-2827"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lirong Dai","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, University of Science and Technology of China, Heifei, Anhui, China"],"raw_orcid":"https://orcid.org/0000-0002-0859-2827","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, University of Science and Technology of China, Heifei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081254155","display_name":"Haoyi Xiong","orcid":"https://orcid.org/0000-0002-5451-3253"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyi Xiong","raw_affiliation_strings":["Baidu, Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5451-3253","affiliations":[{"raw_affiliation_string":"Baidu, Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5084155236"],"corresponding_institution_ids":["https://openalex.org/I98301712"],"apc_list":null,"apc_paid":null,"fwci":11.936,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.98418725,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"17","issue":"5","first_page":"2626","last_page":"2639"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10679","display_name":"Service-Oriented Architecture and Web Services","score":0.8791999816894531,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10679","display_name":"Service-Oriented Architecture and Web Services","score":0.8791999816894531,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.8285999894142151,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.8233000040054321,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8323315382003784},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.7188297510147095},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6581323742866516},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6029309034347534},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3604217767715454},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3543447256088257},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3532460331916809},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.32234930992126465},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3216206431388855}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8323315382003784},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.7188297510147095},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6581323742866516},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6029309034347534},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3604217767715454},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3543447256088257},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3532460331916809},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.32234930992126465},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3216206431388855},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsc.2024.3399654","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsc.2024.3399654","pdf_url":null,"source":{"id":"https://openalex.org/S204223317","display_name":"IEEE Transactions on Services Computing","issn_l":"1939-1374","issn":["1939-1374","2372-0204"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Services Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1859363816","https://openalex.org/W2024122052","https://openalex.org/W2171318529","https://openalex.org/W2896457183","https://openalex.org/W2913340405","https://openalex.org/W2919290281","https://openalex.org/W2933138175","https://openalex.org/W2953213487","https://openalex.org/W2963254338","https://openalex.org/W2964110616","https://openalex.org/W2973727699","https://openalex.org/W2978017171","https://openalex.org/W2979245724","https://openalex.org/W2982583481","https://openalex.org/W2984037327","https://openalex.org/W2999645065","https://openalex.org/W2999851651","https://openalex.org/W3006945411","https://openalex.org/W3010969086","https://openalex.org/W3040573126","https://openalex.org/W3081168214","https://openalex.org/W3094342783","https://openalex.org/W3105038888","https://openalex.org/W3138895808","https://openalex.org/W3169109617","https://openalex.org/W3169483174","https://openalex.org/W3170841641","https://openalex.org/W3176617251","https://openalex.org/W3192523796","https://openalex.org/W3197720002","https://openalex.org/W3199518308","https://openalex.org/W3202544039","https://openalex.org/W3205803342","https://openalex.org/W3205972749","https://openalex.org/W3206606249","https://openalex.org/W3209034179","https://openalex.org/W4200634402","https://openalex.org/W4214831727","https://openalex.org/W4221167110","https://openalex.org/W4226364033","https://openalex.org/W4226515448","https://openalex.org/W4285225635","https://openalex.org/W4287391717","https://openalex.org/W4293718192","https://openalex.org/W4296079340","https://openalex.org/W4319990461","https://openalex.org/W4323338438","https://openalex.org/W4361866125","https://openalex.org/W4380876056","https://openalex.org/W4385567093","https://openalex.org/W6631190155","https://openalex.org/W6732520560","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6767997687","https://openalex.org/W6768851824","https://openalex.org/W6769062451","https://openalex.org/W6774806506","https://openalex.org/W6778883912","https://openalex.org/W6780805062","https://openalex.org/W6784375553","https://openalex.org/W6788811087","https://openalex.org/W6793032698","https://openalex.org/W6793102544","https://openalex.org/W6799372109","https://openalex.org/W6800054401","https://openalex.org/W6801256880","https://openalex.org/W6802290083","https://openalex.org/W6802357070","https://openalex.org/W6805239564","https://openalex.org/W6810296985","https://openalex.org/W6810787498","https://openalex.org/W6811035422","https://openalex.org/W6811726652","https://openalex.org/W6839538294","https://openalex.org/W6850820320","https://openalex.org/W6891973520"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952","https://openalex.org/W4366179611","https://openalex.org/W2996078371"],"abstract_inverted_index":{"While":[0,65],"modern":[1],"internet":[2],"services,":[3],"such":[4],"as":[5,144],"chatbots,":[6],"search":[7],"engines,":[8],"and":[9,23,59,84,99,118,137,181],"online":[10],"advertising,":[11],"demand":[12],"the":[13,40,46,52,80,122,125,158,168,179,183,187,235],"use":[14],"of":[15,48,55,82,175,221],"large-scale":[16,74,116],"deep":[17],"neural":[18],"networks":[19],"(DNNs),":[20],"distributed":[21],"training":[22,49,76,83,117,123,132],"inference":[24,85,151],"over":[25,77,140],"heterogeneous":[26,78],"computing":[27],"systems":[28],"are":[29],"desired":[30],"to":[31,44,51,145,177,202],"facilitate":[32],"these":[33],"DNN":[34],"models.":[35],"Mixture-of-Experts":[36,219],"(MoE)":[37],"is":[38,161],"one":[39],"most":[41],"common":[42],"strategies":[43],"lower":[45],"cost":[47],"subject":[50],"overall":[53],"size":[54,160],"models/data":[56],"through":[57],"gating":[58],"parallelism":[60],"in":[61,71,114,121,152,190,224],"a":[62,108,153,173,191,209,217],"divide-and-conquer":[63],"fashion.":[64],"DeepSpeed":[66,241],"[1]":[67],"has":[68],"made":[69],"efforts":[70],"carrying":[72],"out":[73,199],"MoE":[75,131,245],"infrastructures,":[79],"efficiency":[81,113],"could":[86],"be":[87],"further":[88],"improved":[89],"from":[90],"several":[91],"system":[92],"aspects,":[93],"including":[94],"load":[95,178],"balancing,":[96],"communication/computation":[97],"efficiency,":[98],"memory":[100,170,188],"footprint":[101],"limits.":[102],"In":[103],"this":[104],"work,":[105],"we":[106],"present":[107],"novel":[109],"MoESys":[110,127,166,206,239,249],"that":[111,238],"boosts":[112],"both":[115],"inference.":[119,196],"Specifically,":[120],"procedure,":[124],"proposed":[126],"adopts":[128],"an":[129],"Elastic":[130],"strategy":[133],"with":[134,216,242],"2D":[135],"prefetch":[136],"Fusion":[138],"communication":[139],"Hierarchical":[141],"storage,":[142],"so":[143],"enjoy":[146],"efficient":[147,195],"parallelisms.":[148],"For":[149],"scalable":[150],"single":[154],"node,":[155],"especially":[156],"when":[157],"model":[159,215,220],"larger":[162],"than":[163],"GPU":[164,230],"memory,":[165],"builds":[167],"CPU-GPU":[169],"jointly":[171],"into":[172],"ring":[174],"sections":[176,189],"model,":[180],"executes":[182],"computation":[184],"tasks":[185],"across":[186],"round-robin":[192],"manner":[193],"for":[194],"We":[197],"carried":[198],"extensive":[200],"experiments":[201],"evaluate":[203],"MoESys,":[204],"where":[205],"successfully":[207],"trains":[208],"Unified":[210],"Feature":[211],"Optimization":[212],"[2]":[213],"(UFO)":[214],"Sparsely-Gated":[218],"12B":[222],"parameters":[223],"8":[225],"days":[226],"on":[227],"48":[228],"A100":[229],"cards.":[231],"The":[232],"comparison":[233],"against":[234],"state-of-the-art":[236],"shows":[237],"outperformed":[240],"33":[243],"unbalanced":[244],"Tasks,":[246],"e.g.,":[247],"UFO,":[248],"achieved":[250],"64":[251]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":13}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
