{"id":"https://openalex.org/W7138147885","doi":"https://doi.org/10.1609/aaai.v40i26.39341","title":"Mosaic Pruning: A Hierarchical Framework for Generalizable Pruning of Mixture-of-Experts Models","display_name":"Mosaic Pruning: A Hierarchical Framework for Generalizable Pruning of Mixture-of-Experts Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138147885","doi":"https://doi.org/10.1609/aaai.v40i26.39341"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i26.39341","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39341","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39341/43302","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39341/43302","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129729647","display_name":"Wentao Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wentao Hu","raw_affiliation_strings":["Xi'an Jiaotong University and Institute of Artificial Intelligence (TeleAI), China Telecom"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University and Institute of Artificial Intelligence (TeleAI), China Telecom","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056276478","display_name":"Mingkuan Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingkuan Zhao","raw_affiliation_strings":["Xi'an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129736074","display_name":"Shuangyong Song","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuangyong Song","raw_affiliation_strings":["Institute of Artificial Intelligence (TeleAI), China Telecom"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence (TeleAI), China Telecom","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129652451","display_name":"Xiaoyan Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyan Zhu","raw_affiliation_strings":["Xi'an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129663146","display_name":"Xin Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Lai","raw_affiliation_strings":["Xi'an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129706927","display_name":"Jiayin Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayin Wang","raw_affiliation_strings":["Xi'an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5129729647"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36600959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"26","first_page":"21885","last_page":"21893"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.18719999492168427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.18719999492168427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.16699999570846558,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10670000314712524,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.8238999843597412},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5760999917984009},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5403000116348267},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4966000020503998},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49540001153945923},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4853000044822693},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4514999985694885},{"id":"https://openalex.org/keywords/mosaic","display_name":"Mosaic","score":0.4320000112056732}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.8238999843597412},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7558000087738037},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5921000242233276},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5760999917984009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5694000124931335},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5403000116348267},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4966000020503998},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4853000044822693},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4514999985694885},{"id":"https://openalex.org/C110739175","wikidata":"https://www.wikidata.org/wiki/Q133067","display_name":"Mosaic","level":2,"score":0.4320000112056732},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4106000065803528},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4007999897003174},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.391400009393692},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3864000141620636},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3292999863624573},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.3264000117778778},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26100000739097595}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i26.39341","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39341","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39341/43302","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i26.39341","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39341","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39341/43302","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6671099662780762,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2715084110","display_name":null,"funder_award_id":"62402376","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3180425335","display_name":null,"funder_award_id":"72293581","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G516663181","display_name":null,"funder_award_id":"72274152","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5733390078","display_name":null,"funder_award_id":"Grant Nos.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138147885.pdf","grobid_xml":"https://content.openalex.org/works/W7138147885.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"Mixture-of-Experts":[1],"(SMoE)":[2],"architectures":[3],"have":[4],"enabled":[5],"a":[6,21,63,70,73,87,112,119,126,164,177,188,193,225],"new":[7,92],"frontier":[8],"in":[9],"scaling":[10],"Large":[11],"Language":[12],"Models":[13],"(LLMs),":[14],"offering":[15],"superior":[16],"performance":[17,75,132],"by":[18,36],"activating":[19],"only":[20],"fraction":[22],"of":[23,107,116,181,187,196,216],"their":[24,30,59],"total":[25],"parameters":[26],"during":[27],"inference.":[28],"However,":[29],"practical":[31],"deployment":[32],"is":[33,81,109],"severely":[34],"hampered":[35],"substantial":[37],"static":[38],"memory":[39],"overhead,":[40],"as":[41],"all":[42],"experts":[43,117],"must":[44],"be":[45],"loaded":[46],"into":[47],"memory.":[48],"Existing":[49],"post-training":[50],"pruning":[51,60],"methods,":[52],"while":[53],"reducing":[54],"model":[55,80,175],"size,":[56],"often":[57],"derive":[58],"criteria":[61],"from":[62,149],"single,":[64],"general-purpose":[65],"corpus.":[66],"This":[67,123],"leads":[68],"to":[69,83,110,137,203],"critical":[71],"limitation:":[72],"catastrophic":[74],"degradation":[76],"when":[77],"the":[78,140,145,173,185,197,214],"pruned":[79,174],"applied":[82],"other":[84],"domains,":[85],"necessitating":[86],"costly":[88],"re-pruning":[89],"for":[90,163],"each":[91,150],"domain.":[93],"To":[94],"address":[95],"this":[96],"generalization":[97],"gap,":[98],"we":[99],"introduce":[100],"Mosaic":[101,169],"Pruning":[102,170],"(MoP).":[103],"The":[104],"core":[105],"idea":[106],"MoP":[108,219],"construct":[111],"functionally":[113,138,178],"comprehensive":[114],"set":[115,180],"through":[118],"structured":[120],"``cluster-then-select\"":[121],"process.":[122],"process":[124],"leverages":[125],"similarity":[127],"metric":[128],"that":[129,161,172,190],"captures":[130],"expert":[131,148],"across":[133],"different":[134],"task":[135],"domains":[136],"cluster":[139,151],"experts,":[141,182],"and":[142,231,239],"subsequently":[143],"selects":[144],"most":[146],"representative":[147],"based":[152],"on":[153,209,228,233],"our":[154,167,217],"proposed":[155,168],"Activation":[156],"Variability":[157],"Score.":[158],"Unlike":[159],"methods":[160],"optimize":[162],"single":[165],"corpus,":[166],"ensures":[171],"retains":[176],"complementary":[179],"much":[183],"like":[184,236],"tiles":[186],"mosaic":[189],"together":[191],"form":[192],"complete":[194],"picture":[195],"original":[198],"model's":[199],"capabilities,":[200],"enabling":[201],"it":[202],"handle":[204],"diverse":[205],"downstream":[206],"tasks.Extensive":[207],"experiments":[208],"various":[210],"MoE":[211],"models":[212],"demonstrate":[213],"superiority":[215],"approach.":[218],"significantly":[220],"outperforms":[221],"prior":[222],"work,":[223],"achieving":[224],"7.24\\%":[226],"gain":[227],"general":[229],"tasks":[230,235],"8.92\\%":[232],"specialized":[234],"math":[237],"reasoning":[238],"code":[240],"generation.":[241]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
