{"id":"https://openalex.org/W4410356707","doi":"https://doi.org/10.1145/3672608.3707747","title":"Mixture of Modular Experts: Distilling Knowledge from a Multilingual Teacher into Specialized Modular Language Models","display_name":"Mixture of Modular Experts: Distilling Knowledge from a Multilingual Teacher into Specialized Modular Language Models","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4410356707","doi":"https://doi.org/10.1145/3672608.3707747"},"language":"en","primary_location":{"id":"doi:10.1145/3672608.3707747","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3672608.3707747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 40th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060494290","display_name":"Mohammed Al-Maamari","orcid":"https://orcid.org/0000-0002-0127-8034"},"institutions":[{"id":"https://openalex.org/I186354981","display_name":"University of Passau","ror":"https://ror.org/05ydjnb78","country_code":"DE","type":"education","lineage":["https://openalex.org/I186354981"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Mohammed Al-Maamari","raw_affiliation_strings":["University of Passau, Passau, Germany"],"affiliations":[{"raw_affiliation_string":"University of Passau, Passau, Germany","institution_ids":["https://openalex.org/I186354981"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010003090","display_name":"Mehdi Ben Amor","orcid":"https://orcid.org/0000-0002-1785-8367"},"institutions":[{"id":"https://openalex.org/I186354981","display_name":"University of Passau","ror":"https://ror.org/05ydjnb78","country_code":"DE","type":"education","lineage":["https://openalex.org/I186354981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Mehdi Ben Amor","raw_affiliation_strings":["University of Passau, Passau, Germany"],"affiliations":[{"raw_affiliation_string":"University of Passau, Passau, Germany","institution_ids":["https://openalex.org/I186354981"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019466280","display_name":"Jelena Mitrovi\u0107","orcid":"https://orcid.org/0000-0003-3220-8749"},"institutions":[{"id":"https://openalex.org/I186354981","display_name":"University of Passau","ror":"https://ror.org/05ydjnb78","country_code":"DE","type":"education","lineage":["https://openalex.org/I186354981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jelena Mitrovi\u0107","raw_affiliation_strings":["Institute for Artificial Intelligence Research and Development of Serbia, Novi Sad, Serbia","University of Passau, Passau, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence Research and Development of Serbia, Novi Sad, Serbia","institution_ids":[]},{"raw_affiliation_string":"University of Passau, Passau, Germany","institution_ids":["https://openalex.org/I186354981"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006866152","display_name":"Michael Granitzer","orcid":"https://orcid.org/0000-0003-3566-5507"},"institutions":[{"id":"https://openalex.org/I186354981","display_name":"University of Passau","ror":"https://ror.org/05ydjnb78","country_code":"DE","type":"education","lineage":["https://openalex.org/I186354981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michael Granitzer","raw_affiliation_strings":["University of Passau, Passau, Germany"],"affiliations":[{"raw_affiliation_string":"University of Passau, Passau, Germany","institution_ids":["https://openalex.org/I186354981"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060494290"],"corresponding_institution_ids":["https://openalex.org/I186354981"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12414598,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"945","last_page":"852"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10636","display_name":"Innovative Teaching and Learning Methods","score":0.5073000192642212,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10636","display_name":"Innovative Teaching and Learning Methods","score":0.5073000192642212,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12542","display_name":"Second Language Learning and Teaching","score":0.5067999958992004,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.8950079679489136},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7980560064315796},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5062962770462036},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4189353287220001},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.37596845626831055}],"concepts":[{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.8950079679489136},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7980560064315796},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5062962770462036},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4189353287220001},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.37596845626831055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3672608.3707747","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3672608.3707747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 40th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W3119866685","https://openalex.org/W4287121196","https://openalex.org/W4287391717","https://openalex.org/W6796487566"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Mixture":[0,98],"of":[1,14,32,99,164,167],"Experts":[2,100],"(MoE)":[3],"architectures":[4],"have":[5,222],"been":[6],"increasingly":[7],"employed":[8],"to":[9,54,91,118,170,181,228],"enhance":[10],"the":[11,30,43,146,159,182,192],"inference":[12],"speed":[13],"Large":[15],"Language":[16],"Models":[17],"(LLMs).":[18],"However,":[19],"in":[20,42,56,108,232],"previous":[21],"MoE":[22,69,117,141,200],"approaches,":[23],"there":[24],"is":[25,189],"no":[26],"clear":[27],"separation":[28],"between":[29],"specialties":[31],"each":[33,187],"expert,":[34],"as":[35],"they":[36],"divide":[37],"tasks":[38],"based":[39,85],"on":[40,47,86],"patterns":[41],"data":[44],"rather":[45],"than":[46],"specific":[48,87,171],"topics.":[49],"This":[50,155],"limits":[51],"their":[52],"ability":[53],"specialize":[55],"particular":[57],"languages":[58],"or":[59],"domains.":[60,220],"Incorporating":[61],"modularity":[62,156],"by":[63,111,191],"developing":[64],"domain-specific":[65,120,130],"experts":[66,131,168],"within":[67,138],"an":[68],"framework":[70],"could":[71],"enable":[72],"more":[73],"flexible":[74,160],"and":[75,83,106,153,162,202,215,226],"efficient":[76],"LLMs,":[77],"where":[78],"modules":[79],"can":[80],"be":[81],"selected":[82],"combined":[84,137],"application":[88,172],"needs,":[89,173],"similar":[90],"software":[92],"libraries.":[93],"We":[94,197,221],"present":[95],"a":[96,139],"Modular":[97],"(MMoE)":[101],"architecture":[102],"that":[103,186,204],"enhances":[104],"efficiency":[105],"specialization":[107],"language":[109],"models":[110],"integrating":[112],"Knowledge":[113],"Distillation":[114],"(KD)":[115],"with":[116],"create":[119],"experts.":[121],"In":[122,143],"our":[123,144,205,224],"approach,":[124],"LLMs":[125],"are":[126,135],"compressed":[127],"into":[128],"smaller,":[129],"using":[132],"KD,":[133],"which":[134],"then":[136],"modular":[140,206],"framework.":[142],"experiments,":[145],"domains":[147],"considered":[148],"were":[149],"English,":[150],"German,":[151],"French,":[152],"Python.":[154],"allows":[157],"for":[158],"selection":[161],"deployment":[163],"any":[165],"subset":[166],"tailored":[169],"facilitating":[174],"resource-efficient":[175],"deployment.":[176],"A":[177],"router":[178],"directs":[179],"inputs":[180],"appropriate":[183],"experts,":[184],"ensuring":[185],"input":[188],"processed":[190],"most":[193],"relevant":[194],"specialized":[195],"module.":[196],"evaluated":[198],"different":[199],"configurations":[201],"found":[203],"approach":[207],"effectively":[208],"handles":[209],"multi-domain":[210],"inputs,":[211],"mitigates":[212],"catastrophic":[213],"forgetting,":[214],"preserves":[216],"knowledge":[217],"across":[218],"multiple":[219],"open-sourced":[223],"dataset1":[225],"codebase2":[227],"facilitate":[229],"further":[230],"research":[231],"this":[233],"domain.":[234]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
