{"id":"https://openalex.org/W4416036299","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.718","title":"Mixture-of-Clustered-Experts: Advancing Expert Specialization and Generalization in Instruction Tuning","display_name":"Mixture-of-Clustered-Experts: Advancing Expert Specialization and Generalization in Instruction Tuning","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036299","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.718"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.718","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.718","pdf_url":"https://aclanthology.org/2025.emnlp-main.718.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.718.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080903430","display_name":"Sugyeong Eo","orcid":"https://orcid.org/0000-0002-8008-6160"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sugyeong Eo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103382343","display_name":"Jung Jun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jung Jun Lee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003145618","display_name":"Chanjun Park","orcid":"https://orcid.org/0000-0002-7200-9632"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chanjun Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111217720","display_name":"Heuiseok Lim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heuiseok Lim","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080903430"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.180556,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14212","last_page":"14223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.12319999933242798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.12319999933242798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10467","display_name":"Psychometric Methodologies and Testing","score":0.038600001484155655,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12884","display_name":"Educational Assessment and Pedagogy","score":0.03480000048875809,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5917999744415283},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.3555000126361847},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.3174999952316284},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3151000142097473},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3107999861240387},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.29010000824928284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6039000153541565},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5917999744415283},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46790000796318054},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.3555000126361847},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3174999952316284},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.718","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.718","pdf_url":"https://aclanthology.org/2025.emnlp-main.718.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.718","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.718","pdf_url":"https://aclanthology.org/2025.emnlp-main.718.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1225283220","display_name":null,"funder_award_id":"NRF-2021R","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G3034753964","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G3071639259","display_name":null,"funder_award_id":"2021R1","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G342704958","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G3914271861","display_name":null,"funder_award_id":"2021R1A6A1A030","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G5463329283","display_name":null,"funder_award_id":"COMPA","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6181273566","display_name":null,"funder_award_id":"NRF-2021R1A6A1A03045425","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G961453132","display_name":null,"funder_award_id":"98115","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G982292920","display_name":null,"funder_award_id":"NRF-20","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320332128","display_name":"Commercializations Promotion Agency for R and D Outcomes","ror":null},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036299.pdf","grobid_xml":"https://content.openalex.org/works/W4416036299.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"sparse":[1],"Mixture-of-Experts":[2],"(MoE)":[3],"architecture":[4],"has":[5],"emerged":[6],"as":[7],"a":[8,17,32,58,117],"highly":[9],"scalable":[10],"solution":[11],"by":[12,42],"conditionally":[13],"activating":[14],"sub-modules":[15],"without":[16],"proportional":[18],"increase":[19],"in":[20,37,64],"computational":[21],"costs.However,":[22],"improving":[23],"expert":[24,68,104],"specialization":[25,106],"to":[26,53],"enhance":[27],"performance":[28],"and":[29,129,139],"generalization":[30,132],"remains":[31],"challenge":[33],"for":[34],"MoE,":[35],"especially":[36],"instruction":[38],"tuning":[39],"scenarios":[40],"characterized":[41],"significant":[43],"input":[44],"heterogeneity.In":[45],"this":[46,55],"work,":[47],"we":[48],"propose":[49],"the":[50,65,76,80,84,87,92,109,137],"Mixture-of-Clustered-Experts":[51],"(MoCE)":[52],"address":[54],"limitation":[56],"through":[57],"dual-stage":[59],"routing":[60,70],"mechanism.The":[61],"first":[62],"stage":[63,78],"mechanism":[66],"performs":[67],"group":[69,85,105],"based":[71,98],"on":[72,99],"sequence-level":[73],"features,":[74],"while":[75,107],"second":[77],"activates":[79],"top-k":[81],"experts":[82],"within":[83],"at":[86],"token":[88],"level.This":[89],"approach":[90],"enables":[91],"effective":[93],"partitioning":[94],"of":[95,111,120,141],"heterogeneous":[96],"inputs":[97],"their":[100],"knowledge":[101],"requirements,":[102],"encouraging":[103],"maintaining":[108],"advantages":[110],"token-level":[112],"routing.We":[113],"evaluate":[114],"MoCE":[115],"across":[116],"comprehensive":[118],"set":[119],"benchmarks,":[121],"demonstrating":[122],"its":[123,130],"consistent":[124],"superiority":[125],"over":[126],"strong":[127],"baselines":[128],"enhanced":[131],"capabilities.Detailed":[133],"analysis":[134],"further":[135],"highlights":[136],"robustness":[138],"effectiveness":[140],"MoCE.":[142]},"counts_by_year":[],"updated_date":"2026-03-25T14:56:36.534964","created_date":"2025-11-08T00:00:00"}
