{"id":"https://openalex.org/W4410986266","doi":"https://doi.org/10.1109/tc.2025.3575905","title":"Serving MoE Models on Resource-Constrained Edge Devices via Dynamic Expert Swapping","display_name":"Serving MoE Models on Resource-Constrained Edge Devices via Dynamic Expert Swapping","publication_year":2025,"publication_date":"2025-06-03","ids":{"openalex":"https://openalex.org/W4410986266","doi":"https://doi.org/10.1109/tc.2025.3575905"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2025.3575905","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2025.3575905","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101585460","display_name":"Rui Kong","orcid":"https://orcid.org/0009-0003-2889-2266"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Kong","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104155300","display_name":"Yuanchun Li","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanchun Li","raw_affiliation_strings":["Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100415467","display_name":"Weijun Wang","orcid":"https://orcid.org/0000-0002-9545-3322"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weijun Wang","raw_affiliation_strings":["Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072308822","display_name":"Linghe Kong","orcid":"https://orcid.org/0000-0001-9266-3044"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linghe Kong","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yunxin Liu","orcid":"https://orcid.org/0000-0001-7352-8955"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunxin Liu","raw_affiliation_strings":["Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of AI Industrial Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101585460"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":8.0902,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.97238787,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"74","issue":"8","first_page":"2799","last_page":"2811"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9638000130653381,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9587000012397766,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6840041279792786},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5546050071716309},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.47753751277923584},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3860039710998535},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3533981442451477},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19107091426849365},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.16261103749275208}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6840041279792786},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5546050071716309},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.47753751277923584},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3860039710998535},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3533981442451477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19107091426849365},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.16261103749275208}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2025.3575905","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2025.3575905","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4968693648","display_name":null,"funder_award_id":"62272261","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2933138175","https://openalex.org/W3165698711","https://openalex.org/W3196950043","https://openalex.org/W3207645655","https://openalex.org/W4283066310","https://openalex.org/W4283815817","https://openalex.org/W4285134706","https://openalex.org/W4312349930","https://openalex.org/W4319587035","https://openalex.org/W4385567093","https://openalex.org/W4393407138","https://openalex.org/W4401211627","https://openalex.org/W6727099177","https://openalex.org/W6732520560","https://openalex.org/W6739901393","https://openalex.org/W6772383348","https://openalex.org/W6778883912","https://openalex.org/W6780805062","https://openalex.org/W6784333009","https://openalex.org/W6788811087","https://openalex.org/W6796487566","https://openalex.org/W6810921705","https://openalex.org/W6837865058","https://openalex.org/W6839827798","https://openalex.org/W6844796666","https://openalex.org/W6845510015","https://openalex.org/W6850960556","https://openalex.org/W6855616086","https://openalex.org/W6860443818"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Mixture":[0],"of":[1,51,93,109,122],"experts":[2,111],"(MoE)":[3],"is":[4,31,95],"a":[5,96,107],"popular":[6],"technique":[7],"in":[8,26,54,112],"deep":[9],"learning":[10],"that":[11,104],"improves":[12],"model":[13,38,89,149],"capacity":[14],"with":[15,160,176],"conditionally-activated":[16],"parallel":[17],"neural":[18],"network":[19],"modules":[20],"(experts).":[21],"However,":[22],"serving":[23],"MoE":[24,52,88,129],"models":[25,53,130],"resource-constrained":[27,86],"latency-critical":[28],"edge":[29],"scenarios":[30],"challenging":[32],"due":[33],"to":[34,60,114],"the":[35,48,65,120,161],"significantly":[36],"increased":[37],"size":[39],"and":[40,72,135,148,170,174],"complexity.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,79,124],"first":[46],"analyze":[47],"behavior":[49],"pattern":[50],"continuous":[55,87],"inference":[56,83],"scenarios,":[57],"which":[58],"leads":[59],"three":[61],"key":[62],"observations":[63],"about":[64],"expert":[66],"activations,":[67],"including":[68],"temporal":[69],"locality,":[70],"exchangeability,":[71],"skippable":[73],"computation.":[74],"Based":[75],"on":[76,131,156],"these":[77],"observations,":[78],"introduce":[80],"PC-MoE,":[81,123],"an":[82],"framework":[84],"for":[85],"serving.":[90],"The":[91,140],"core":[92],"PC-MoE":[94],"new":[97],"data":[98],"structure,":[99],"<italic":[100],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[101],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Parameter":[102],"Committee</i>,":[103],"intelligently":[105],"maintains":[106],"subset":[108],"important":[110],"use":[113],"reduce":[115,167],"resource":[116,146],"consumption.":[117],"To":[118],"evaluate":[119],"effectiveness":[121],"conduct":[125],"experiments":[126],"using":[127],"state-of-the-art":[128],"common":[132],"computer":[133],"vision":[134],"natural":[136],"language":[137],"processing":[138],"tasks.":[139],"results":[141],"demonstrate":[142],"optimal":[143],"trade-offs":[144],"between":[145],"consumption":[147],"accuracy":[150,179],"achieved":[151],"by":[152,172],"PC-MoE.":[153],"For":[154],"instance,":[155],"object":[157],"detection":[158],"tasks":[159],"Swin-MoE":[162],"model,":[163],"our":[164],"approach":[165],"can":[166],"memory":[168],"usage":[169],"latency":[171],"42.34%":[173],"18.63%":[175],"only":[177],"0.10%":[178],"degradation.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
