{"id":"https://openalex.org/W7118920828","doi":"https://doi.org/10.48550/arxiv.2601.01260","title":"MambaFormer: Token-Level Guided Routing Mixture-of-Experts for Accurate and Efficient Clinical Assistance","display_name":"MambaFormer: Token-Level Guided Routing Mixture-of-Experts for Accurate and Efficient Clinical Assistance","publication_year":2026,"publication_date":"2026-01-03","ids":{"openalex":"https://openalex.org/W7118920828","doi":"https://doi.org/10.48550/arxiv.2601.01260"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.01260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.01260","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111309022","display_name":"Hamad Khan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Khan, Hamad","raw_affiliation_strings":["Artificial Intelligence Lab, Department of Computer Systems Engineering, University of Engineering,Applied Sciences"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab, Department of Computer Systems Engineering, University of Engineering,Applied Sciences","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083423537","display_name":"Saddam Hussain Khan","orcid":"https://orcid.org/0000-0002-6681-1987"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Khan, Saddam Hussain","raw_affiliation_strings":["Artificial Intelligence Lab, Department of Computer Systems Engineering, University of Engineering,Applied Sciences"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab, Department of Computer Systems Engineering, University of Engineering,Applied Sciences","institution_ids":["https://openalex.org/I4210164862"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5111309022"],"corresponding_institution_ids":["https://openalex.org/I4210164862"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.49570000171661377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.49570000171661377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1526000052690506,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.09960000216960907,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6844000220298767},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5648000240325928},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.48750001192092896},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.45410001277923584},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4424000084400177},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.429500013589859},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.3734000027179718},{"id":"https://openalex.org/keywords/static-routing","display_name":"Static routing","score":0.37279999256134033},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.36809998750686646},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.3643999993801117}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8149999976158142},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6844000220298767},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5648000240325928},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.48750001192092896},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.45750001072883606},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.45410001277923584},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4424000084400177},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.429500013589859},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.3734000027179718},{"id":"https://openalex.org/C204948658","wikidata":"https://www.wikidata.org/wiki/Q1119410","display_name":"Static routing","level":4,"score":0.37279999256134033},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.36809998750686646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36579999327659607},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3643999993801117},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3553999960422516},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.35100001096725464},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C104954878","wikidata":"https://www.wikidata.org/wiki/Q1648707","display_name":"Routing protocol","level":3,"score":0.3230000138282776},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.3021000027656555},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.3005000054836273},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C184896649","wikidata":"https://www.wikidata.org/wiki/Q290066","display_name":"Routing table","level":4,"score":0.2888000011444092},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.27489998936653137},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26589998602867126},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26579999923706055},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C76522221","wikidata":"https://www.wikidata.org/wiki/Q5035396","display_name":"Multipath routing","level":5,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.01260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.01260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,46,80,190],"deployment":[1],"of":[2,23,123],"large":[3],"language":[4],"models":[5,85],"(LLMs)":[6],"in":[7],"real-world":[8],"clinical":[9,44,215],"applications":[10],"is":[11,171],"constrained":[12],"by":[13,119,161],"the":[14,21,120,168,178],"fundamental":[15],"trade-off":[16,136],"between":[17,137],"computational":[18,159],"cost":[19,160],"and":[20,43,83,97,101,129,140,158,182,185,208],"efficiency":[22],"linear-time":[24],"models.":[25],"To":[26],"address":[27],"this,":[28],"we":[29],"propose":[30],"an":[31],"LLM-based":[32],"MambaFormer":[33,47,170,192],"hybrid":[34],"Mixture-of-Experts":[35],"(MoE)":[36],"framework":[37],"for":[38,64,76,174,213],"efficient":[39],"medical":[40,175],"question-answering":[41],"(QA)":[42],"assistance.":[45],"employs":[48],"a":[49,59,70,108,134,144,203,210],"lightweight":[50],"gating":[51],"mechanism":[52],"that":[53],"performs":[54],"token-level":[55,164],"dynamic":[56],"routing":[57,115,154],"to":[58,69,88],"customized":[60,81],"Transformer":[61],"expert":[62,74,156,165],"(ET5)":[63],"short,":[65],"complex":[66],"queries":[67],"or":[68],"State":[71],"Space":[72],"Model":[73],"(EMamba)":[75],"long,":[77],"high-throughput":[78],"sequences.":[79],"EMamba":[82],"ET5":[84],"are":[86,102,117],"tailored":[87],"accommodate":[89],"input":[90],"sequence":[91,95,127],"dimensionality,":[92],"embedding":[93],"structure,":[94],"length,":[96,128],"target-specific":[98],"output":[99],"heads,":[100],"fine-tuned":[103],"through":[104],"transfer":[105],"learning":[106],"on":[107,177],"new,":[109,179],"custom-designed":[110,180],"DentalQA":[111,181],"dataset.":[112],"Moreover,":[113],"intelligent":[114],"decisions":[116],"driven":[118],"contextual":[121],"complexity":[122],"token":[124],"embeddings,":[125],"normalized":[126],"domain-aware":[130],"features,":[131],"thereby":[132],"enforcing":[133],"Pareto-optimal":[135],"inference":[138],"latency":[139,199],"prediction":[141],"accuracy.":[142],"Furthermore,":[143],"novel":[145],"utility-guided":[146],"multi-objective":[147],"loss":[148],"jointly":[149],"optimizes":[150],"decisions,":[151],"router":[152],"parameters,":[153],"behavior,":[155],"utilization,":[157],"adaptively":[162],"regulating":[163],"activation.":[166],"Finally,":[167],"proposed":[169,191],"cross-validated":[172],"(holdout)":[173],"QA":[176],"PubMedQA":[183],"datasets":[184],"compared":[186],"with":[187,197],"state-of-the-art":[188],"techniques.":[189],"outperforms":[193],"(BERTScore":[194],"=":[195],"0.9180)":[196],"ultra-low":[198],"(0.077":[200],"s),":[201],"delivering":[202],"24.4":[204],"speedup":[205],"over":[206],"T5-Large":[207],"establishing":[209],"scalable":[211],"solution":[212],"resource-constrained":[214],"deployment.":[216]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
