{"id":"https://openalex.org/W4416004184","doi":"https://doi.org/10.1145/3731599.3767377","title":"Compression Error Sensitivity Analysis for Different Experts in MoE Model Inference","display_name":"Compression Error Sensitivity Analysis for Different Experts in MoE Model Inference","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416004184","doi":"https://doi.org/10.1145/3731599.3767377"},"language":null,"primary_location":{"id":"doi:10.1145/3731599.3767377","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767377","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731599.3767377","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103443314","display_name":"Songkai Ma","orcid":"https://orcid.org/0009-0001-1956-096X"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Songkai Ma","raw_affiliation_strings":["Department of Computing, Hong Kong Polytechnic University, Hong Hong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computing, Hong Kong Polytechnic University, Hong Hong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101836874","display_name":"Zhaorui Zhang","orcid":"https://orcid.org/0000-0003-0284-1113"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zhaorui Zhang","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103124363","display_name":"Sheng Di","orcid":"https://orcid.org/0000-0002-9935-5674"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheng Di","raw_affiliation_strings":["Argonne National Labratory, Argonne National Laboratory (ANL), DuPage, Illinois, USA"],"affiliations":[{"raw_affiliation_string":"Argonne National Labratory, Argonne National Laboratory (ANL), DuPage, Illinois, USA","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081645974","display_name":"Benben Liu","orcid":"https://orcid.org/0009-0009-8300-9740"},"institutions":[{"id":"https://openalex.org/I4210131801","display_name":"Hong Kong R&D Centre for Logistics and Supply Chain Management Enabling Technologies","ror":"https://ror.org/03nm59d75","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210131801"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Benben Liu","raw_affiliation_strings":["LSCM, The University of Hong Kong, Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"LSCM, The University of Hong Kong, Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895","https://openalex.org/I4210131801"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102950844","display_name":"Xiaodong Yu","orcid":"https://orcid.org/0000-0003-2432-6171"},"institutions":[{"id":"https://openalex.org/I108468826","display_name":"Stevens Institute of Technology","ror":"https://ror.org/02z43xh36","country_code":"US","type":"education","lineage":["https://openalex.org/I108468826"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaodong Yu","raw_affiliation_strings":["Department of Computer Science, Stevens Institute of Technology, Hoboken, New Jersey, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Stevens Institute of Technology, Hoboken, New Jersey, USA","institution_ids":["https://openalex.org/I108468826"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067748041","display_name":"Xiaoyi Lu","orcid":"https://orcid.org/0000-0001-7581-8905"},"institutions":[{"id":"https://openalex.org/I156087764","display_name":"University of California, Merced","ror":"https://ror.org/00d9ah105","country_code":"US","type":"education","lineage":["https://openalex.org/I156087764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoyi Lu","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California Merced, Merced, California, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California Merced, Merced, California, USA","institution_ids":["https://openalex.org/I156087764"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100411794","display_name":"Dan Wang","orcid":"https://orcid.org/0000-0002-0921-2726"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Dan Wang","raw_affiliation_strings":["Department of Computing, Hong Kong Polytechnic University, Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computing, Hong Kong Polytechnic University, Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5103443314"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":4.7137,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95303994,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"339","last_page":"348"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2280000001192093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2280000001192093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.08340000361204147,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.06539999693632126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6728000044822693},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5737000107765198},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.49149999022483826},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.47350001335144043},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.46939998865127563},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.33899998664855957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7533000111579895},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6728000044822693},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5737000107765198},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5314000248908997},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49320000410079956},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.49149999022483826},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.47350001335144043},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.46939998865127563},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38659998774528503},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.27160000801086426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731599.3767377","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767377","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731599.3767377","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767377","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5666201409","display_name":null,"funder_award_id":"Office of Science, Advanced Scientific Computing Research (ASCR), DE-AC02-06CH11357","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8414120076","display_name":null,"funder_award_id":"62302420","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W3205035978","https://openalex.org/W3205120144","https://openalex.org/W4226076355","https://openalex.org/W4388105305","https://openalex.org/W4389162698","https://openalex.org/W4400409906","https://openalex.org/W4401211627","https://openalex.org/W4405756077","https://openalex.org/W4407197341","https://openalex.org/W4409882952","https://openalex.org/W4410040007","https://openalex.org/W4410583234","https://openalex.org/W4411346147","https://openalex.org/W4412802867","https://openalex.org/W4412944360"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,12,33,54,58,61,76,81,85],"widespread":[2],"application":[3],"of":[4,6,14,56],"Mixture":[5],"Experts":[7],"(MoE)":[8],"reasoning":[9],"models":[10,20],"in":[11],"field":[13],"LLM":[15],"learning,":[16],"efficiently":[17],"serving":[18],"MoE":[19],"under":[21],"limited":[22],"GPU":[23,62],"memory":[24,38,63],"constraints":[25],"has":[26,39],"emerged":[27],"as":[28,42],"a":[29,49],"significant":[30],"challenge.":[31],"Offloading":[32],"non-activated":[34],"experts":[35],"to":[36,46,69,74],"main":[37,65],"been":[40],"identified":[41],"an":[43,71],"efficient":[44,72],"approach":[45,73],"address":[47],"such":[48],"problem,":[50],"while":[51],"it":[52],"brings":[53],"challenges":[55],"transferring":[57],"expert":[59,77],"between":[60],"and":[64,78],"memory.":[66],"We":[67],"need":[68],"explore":[70],"compress":[75],"analyze":[79],"how":[80],"compression":[82],"error":[83],"affects":[84],"inference":[86],"performance.":[87]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-07T00:00:00"}
