{"id":"https://openalex.org/W7127323913","doi":"https://doi.org/10.48550/arxiv.2602.01990","title":"SAME: Stabilized Mixture-of-Experts for Multimodal Continual Instruction Tuning","display_name":"SAME: Stabilized Mixture-of-Experts for Multimodal Continual Instruction Tuning","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127323913","doi":"https://doi.org/10.48550/arxiv.2602.01990"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.01990","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.01990","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124935476","display_name":"Zhen-Hao Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xie, Zhen-Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124898394","display_name":"Jun-Tao Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Jun-Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000905854","display_name":"Yu-Cheng Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Yu-Cheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124879000","display_name":"Han-Jia Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Han-Jia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124922682","display_name":"De-Chuan Zhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhan, De-Chuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124888103","display_name":"Da-Wei Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Da-Wei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124935476"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.31139999628067017,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.31139999628067017,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.18150000274181366,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.09589999914169312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.5634999871253967},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5350000262260437},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5266000032424927},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4984000027179718},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.4943000078201294},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48559999465942383},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.4047999978065491},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.37119999527931213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7723000049591064},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.5634999871253967},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5266000032424927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5231000185012817},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4984000027179718},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.4943000078201294},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48559999465942383},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.474700003862381},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.29750001430511475},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.28700000047683716},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28610000014305115},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.25529998540878296},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C24856439","wikidata":"https://www.wikidata.org/wiki/Q352483","display_name":"Adaptive routing","level":5,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.01990","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.01990","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01990","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0,22],"Large":[1],"Language":[2],"Models":[3],"(MLLMs)":[4],"achieve":[5],"strong":[6],"performance":[7],"through":[8],"instruction":[9],"tuning,":[10],"but":[11,38],"real-world":[12],"deployment":[13],"requires":[14],"them":[15],"to":[16,34,68,167],"continually":[17],"expand":[18],"their":[19,87],"capabilities,":[20],"making":[21],"Continual":[23],"Instruction":[24],"Tuning":[25],"(MCIT)":[26],"essential.":[27],"Recent":[28],"methods":[29],"leverage":[30],"sparse":[31],"expert":[32,43,98,105,128,144,148,165],"routing":[33,44,132],"promote":[35],"task":[36],"specialization,":[37],"we":[39,115,146],"find":[40],"that":[41,59],"the":[42,50,76],"process":[45],"suffers":[46],"from":[47],"drift":[48],"as":[49],"data":[51],"distribution":[52],"evolves.":[53],"For":[54],"example,":[55],"a":[56,158],"grounding":[57],"query":[58],"previously":[60],"activated":[61],"localization":[62],"experts":[63,70,78,109,170],"may":[64],"instead":[65],"be":[66,80],"routed":[67],"irrelevant":[69],"after":[71],"learning":[72],"OCR":[73],"tasks.":[74,113],"Meanwhile,":[75],"grounding-related":[77],"can":[79],"overwritten":[81,111],"by":[82,130],"new":[83],"tasks":[84],"and":[85,104,137,176],"lose":[86],"original":[88],"functionality.":[89],"Such":[90],"failure":[91],"reflects":[92],"two":[93],"problems:":[94],"router":[95,124],"drift,":[96,106,125,145],"where":[97,107],"selection":[99,129],"becomes":[100],"inconsistent":[101],"over":[102],"time,":[103],"shared":[108],"are":[110],"across":[112],"Therefore,":[114],"propose":[116],"StAbilized":[117],"Mixture-of-Experts":[118],"(SAME)":[119],"for":[120],"MCIT.":[121],"To":[122,142],"address":[123],"SAME":[126,161],"stabilizes":[127],"decomposing":[131],"dynamics":[133],"into":[134],"orthogonal":[135],"subspaces":[136],"updating":[138],"only":[139],"task-relevant":[140],"directions.":[141],"mitigate":[143],"regulate":[147],"updates":[149],"via":[150],"curvature-aware":[151],"scaling":[152],"using":[153],"historical":[154],"input":[155],"covariance":[156],"in":[157],"rehearsal-free":[159],"manner.":[160],"also":[162],"introduces":[163],"adaptive":[164],"activation":[166],"freeze":[168],"selected":[169],"during":[171],"training,":[172],"reducing":[173],"redundant":[174],"computation":[175],"cross-task":[177],"interference.":[178],"Extensive":[179],"experiments":[180],"demonstrate":[181],"its":[182],"SOTA":[183],"performance.":[184]},"counts_by_year":[],"updated_date":"2026-02-04T23:14:21.375766","created_date":"2026-02-04T00:00:00"}
