{"id":"https://openalex.org/W7154708512","doi":"https://doi.org/10.48550/arxiv.2604.14246","title":"Awakening Dormant Experts:Counterfactual Routing to Mitigate MoE Hallucinations","display_name":"Awakening Dormant Experts:Counterfactual Routing to Mitigate MoE Hallucinations","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154708512","doi":"https://doi.org/10.48550/arxiv.2604.14246"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14246","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14246","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14246","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133902774","display_name":"Wentao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hu, Wentao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122982641","display_name":"Yanbo Zhai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhai, Yanbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133873303","display_name":"Xiaohui Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Xiaohui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056276478","display_name":"Mingkuan Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Mingkuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133865709","display_name":"Shanhong yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"yu, Shanhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133917371","display_name":"Xue Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133834621","display_name":"Kaidong Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Kaidong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133915882","display_name":"Shuangyong Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Shuangyong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133874646","display_name":"Xuelong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xuelong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5133902774"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.20829999446868896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.20829999446868896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.06270000338554382,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.05510000139474869,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.8482000231742859},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6014999747276306},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5830000042915344},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.5116999745368958},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.4415000081062317},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.43529999256134033},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.40389999747276306},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.319599986076355}],"concepts":[{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.8482000231742859},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6675000190734863},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6014999747276306},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5830000042915344},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.5116999745368958},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.4415000081062317},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.43529999256134033},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4196000099182129},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.40389999747276306},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37439998984336853},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C2775953691","wikidata":"https://www.wikidata.org/wiki/Q5013874","display_name":"CRFS","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C195487862","wikidata":"https://www.wikidata.org/wiki/Q850210","display_name":"Revenue","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27459999918937683},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.25999999046325684},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.25220000743865967},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.25189998745918274},{"id":"https://openalex.org/C2777087702","wikidata":"https://www.wikidata.org/wiki/Q7306421","display_name":"Redux","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14246","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14246","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14246","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14246","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"Mixture-of-Experts":[1],"(MoE)":[2],"models":[3],"have":[4],"achieved":[5],"remarkable":[6],"scalability,":[7],"yet":[8],"they":[9],"remain":[10,53],"vulnerable":[11],"to":[12,31,81,98,105,151],"hallucinations,":[13],"particularly":[14],"when":[15],"processing":[16],"long-tail":[17,44],"knowledge.":[18],"We":[19],"identify":[20],"that":[21,131],"this":[22],"fragility":[23],"stems":[24],"from":[25,103],"static":[26,152],"Top-$k$":[27],"routing:":[28],"routers":[29],"tend":[30],"favor":[32],"high-frequency":[33],"patterns":[34],"over":[35],"rare":[36],"factual":[37,134],"associations.":[38],"Consequently,":[39],"``specialist":[40],"experts''":[41],"possessing":[42],"critical":[43],"knowledge":[45],"are":[46],"often":[47],"assigned":[48],"low":[49],"gating":[50],"scores":[51],"and":[52,128],"``dormant''":[54],"--":[55],"under-prioritized":[56],"for":[57],"specific":[58],"tokens":[59],"despite":[60],"their":[61],"proven":[62],"causal":[63],"importance":[64],"on":[65,125,138],"other":[66],"inputs.":[67],"To":[68],"address":[69],"this,":[70],"we":[71],"propose":[72],"Counterfactual":[73,93],"Routing":[74],"(CoR),":[75],"a":[76,110,146],"training-free":[77],"inference":[78,143],"framework":[79],"designed":[80],"awaken":[82],"these":[83],"dormant":[84],"experts.":[85],"CoR":[86,132],"integrates":[87],"layer-wise":[88],"perturbation":[89],"analysis":[90],"with":[91],"the":[92,142],"Expert":[94],"Impact":[95],"(CEI)":[96],"metric":[97],"dynamically":[99],"shift":[100],"computational":[101],"resources":[102],"syntax-dominant":[104],"knowledge-intensive":[106],"layers":[107],"while":[108],"maintaining":[109],"constant":[111],"total":[112],"activation":[113],"count,":[114],"effectively":[115],"retrieving":[116],"causally":[117],"decisive":[118],"experts":[119],"via":[120],"virtual":[121],"ablation.":[122],"Extensive":[123],"experiments":[124],"TruthfulQA,":[126],"FACTOR,":[127],"TriviaQA":[129],"demonstrate":[130],"improves":[133],"accuracy":[135],"by":[136],"3.1\\%":[137],"average":[139],"without":[140],"increasing":[141],"budget,":[144],"establishing":[145],"superior":[147],"Pareto":[148],"frontier":[149],"compared":[150],"scaling":[153],"strategies.":[154]},"counts_by_year":[],"updated_date":"2026-04-18T06:05:20.339008","created_date":"2026-04-18T00:00:00"}
