{"id":"https://openalex.org/W4415537993","doi":"https://doi.org/10.1145/3746027.3755754","title":"Input Domain Aware MoE: Decoupling Routing Decisions from Task Optimization in Mixture of Experts","display_name":"Input Domain Aware MoE: Decoupling Routing Decisions from Task Optimization in Mixture of Experts","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415537993","doi":"https://doi.org/10.1145/3746027.3755754"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755754","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755754","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.16448","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042417337","display_name":"Yupeng Hua","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"YongXiang Hua","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0008-8849-4717","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068313625","display_name":"Haoyu Cao","orcid":"https://orcid.org/0000-0002-3789-9705"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Cao","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-3789-9705","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhou Tao","orcid":"https://orcid.org/0009-0005-4433-4373"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Tao","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0005-4433-4373","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102254671","display_name":"Bocheng Li","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bocheng Li","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0005-2171-7902","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zihao Wu","orcid":"https://orcid.org/0009-0007-2199-8518"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihao Wu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0007-2199-8518","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061846588","display_name":"Chaohu Liu","orcid":"https://orcid.org/0009-0001-7588-4264"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaohu Liu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0001-7588-4264","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009732907","display_name":"Linli Xu","orcid":"https://orcid.org/0000-0003-0227-3793"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linli Xu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0003-0227-3793","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and State Key Laboratory of Cognitive Intelligence, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5042417337"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":1.0362,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82145124,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"5110","last_page":"5119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6682999730110168},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5867999792098999},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5199000239372253},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5177000164985657},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.503600001335144},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.478300005197525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7724000215530396},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6682999730110168},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5867999792098999},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5199000239372253},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5177000164985657},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.503600001335144},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.478300005197525},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4235000014305115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40299999713897705},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39899998903274536},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.361299991607666},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3467000126838684},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.30869999527931213},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2971999943256378},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2669999897480011}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746027.3755754","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755754","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2510.16448","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.16448","pdf_url":"https://arxiv.org/pdf/2510.16448","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.16448","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.16448","pdf_url":"https://arxiv.org/pdf/2510.16448","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2963518342","https://openalex.org/W2963622213","https://openalex.org/W2979382951","https://openalex.org/W4220967350","https://openalex.org/W4364382874","https://openalex.org/W4390874708","https://openalex.org/W4401362547","https://openalex.org/W4402704527","https://openalex.org/W4402727885"],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"Mixture":[1],"of":[2,26,91,115],"Experts":[3],"(sMoE)":[4],"has":[5],"become":[6],"a":[7,49,68,74,89],"pivotal":[8],"approach":[9],"for":[10,119],"scaling":[11],"large":[12],"vision-language":[13,129],"models,":[14],"offering":[15],"substantial":[16],"capacity":[17],"while":[18,102],"maintaining":[19],"computational":[20],"efficiency":[21],"through":[22],"dynamic,":[23],"sparse":[24],"activation":[25],"experts.":[27],"However,":[28],"existing":[29,137],"routing":[30,70,86,110],"mechanisms,":[31],"typically":[32],"based":[33],"on":[34,128],"similarity":[35],"scoring,":[36],"struggle":[37],"to":[38,48,78,97],"effectively":[39],"capture":[40],"the":[41,81],"underlying":[42],"input":[43,82],"structure.":[44],"This":[45],"limitation":[46],"leads":[47],"trade-off":[50],"between":[51],"expert":[52,124,146],"specialization":[53,100],"and":[54,60,122,144],"balanced":[55,104],"computation,":[56],"hindering":[57],"both":[58],"scalability":[59],"performance.":[61],"We":[62],"propose":[63],"Input":[64],"Domain":[65],"Aware":[66],"MoE,":[67],"novel":[69],"framework":[71],"that":[72,132],"leverages":[73],"probabilistic":[75],"mixture":[76,90],"model":[77],"better":[79],"partition":[80],"space.":[83],"By":[84],"modeling":[85],"probabilities":[87],"as":[88],"distributions,":[92],"our":[93,109,133],"method":[94,134],"enables":[95],"experts":[96],"develop":[98],"clear":[99],"boundaries":[101],"achieving":[103,140],"utilization.":[105],"Unlike":[106],"conventional":[107],"approaches,":[108,139],"mechanism":[111],"is":[112],"trained":[113],"independently":[114],"task-specific":[116],"objectives,":[117],"allowing":[118],"stable":[120],"optimization":[121],"decisive":[123],"assignments.":[125],"Empirical":[126],"results":[127],"tasks":[130],"demonstrate":[131],"consistently":[135],"outperforms":[136],"sMoE":[138],"higher":[141],"task":[142],"performance":[143],"improved":[145],"utilization":[147],"balance.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-16T08:24:45.110214","created_date":"2025-10-25T00:00:00"}
