{"id":"https://openalex.org/W7154993113","doi":"https://doi.org/10.48550/arxiv.2604.16264","title":"Information Router for Mitigating Modality Dominance in Vision-Language Models","display_name":"Information Router for Mitigating Modality Dominance in Vision-Language Models","publication_year":2026,"publication_date":"2026-04-17","ids":{"openalex":"https://openalex.org/W7154993113","doi":"https://doi.org/10.48550/arxiv.2604.16264"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.16264","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16264","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.16264","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100612922","display_name":"Seulgi Kim","orcid":"https://orcid.org/0000-0002-8015-7379"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Seulgi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010958338","display_name":"Mohit Prabhushankar","orcid":"https://orcid.org/0000-0002-8743-7058"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prabhushankar, Mohit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134058646","display_name":"Ghassan AlRegib","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"AlRegib, Ghassan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9463000297546387,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9463000297546387,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.005100000184029341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.73580002784729},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5971999764442444},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5480999946594238},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.453900009393692},{"id":"https://openalex.org/keywords/dominance","display_name":"Dominance (genetics)","score":0.3986000120639801},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3407000005245209},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3237999975681305}],"concepts":[{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.73580002784729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.680899977684021},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5971999764442444},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5480999946594238},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.498199999332428},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.453900009393692},{"id":"https://openalex.org/C151913843","wikidata":"https://www.wikidata.org/wiki/Q3454555","display_name":"Dominance (genetics)","level":3,"score":0.3986000120639801},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3237999975681305},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3208000063896179},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C2982962833","wikidata":"https://www.wikidata.org/wiki/Q17092450","display_name":"Information fusion","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2718000113964081},{"id":"https://openalex.org/C3018121129","wikidata":"https://www.wikidata.org/wiki/Q2122243","display_name":"Negative information","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.16264","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16264","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.16264","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16264","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision":[0],"Language":[1],"models":[2],"(VLMs)":[3],"have":[4],"demonstrated":[5],"strong":[6],"performance":[7],"across":[8,168],"a":[9,26,126,138],"wide":[10],"range":[11],"of":[12,93],"benchmarks,":[13],"yet":[14],"they":[15,134],"often":[16,71],"suffer":[17],"from":[18,125],"modality":[19,151,156,181,192,210],"dominance,":[20,152],"where":[21,52],"predictions":[22],"rely":[23],"disproportionately":[24],"on":[25,162],"single":[27],"modality.":[28],"Prior":[29],"approaches":[30],"primarily":[31],"address":[32],"this":[33,96],"issue":[34],"by":[35,137],"steering":[36],"model's":[37,85],"attention":[38,49,86],"allocation,":[39],"implicitly":[40],"assuming":[41],"that":[42,60,108,175,197],"all":[43],"modalities":[44,70],"provide":[45],"sufficient":[46],"information.":[47,94],"However,":[48],"only":[50],"determines":[51],"the":[53,66,90],"model":[54,170],"focuses,":[55],"and":[56,76,121,183,186,205],"cannot":[57],"enrich":[58],"information":[59,74,111,124,144,201],"is":[61,157,202],"missing":[62],"or":[63],"ambiguous.":[64],"In":[65,80,95],"real":[67],"world,":[68],"input":[69],"differ":[72],"in":[73,150,212],"density":[75],"their":[77],"signal-to-noise":[78],"ratios.":[79],"such":[81],"cases,":[82],"simply":[83],"adjusting":[84],"does":[87],"not":[88],"resolve":[89],"underlying":[91],"lack":[92],"paper,":[97],"we":[98],"propose":[99],"\\textsc{MoIR}:":[100],"\\textit{Multi-modal":[101],"Information":[102],"Router},":[103],"an":[104,203],"information-level":[105],"fusion":[106],"method":[107],"explicitly":[109,198],"reduces":[110],"disparity":[112],"prior":[113],"to":[114],"fusion.":[115],"\\textsc{MoIR}":[116,146,161,176],"identifies":[117],"less":[118],"informative":[119],"tokens":[120],"routes":[122],"complementary":[123,206],"stronger":[127],"modality,":[128],"constructing":[129],"information-dense":[130],"token":[131],"representations":[132],"before":[133],"are":[135],"processed":[136],"large":[139],"language":[140],"model.":[141],"By":[142],"modifying":[143,199],"availability,":[145],"enables":[147],"reliable":[148],"shifts":[149],"even":[153,190],"when":[154],"one":[155],"degraded.":[158],"We":[159],"evaluate":[160],"three":[163],"widely":[164],"used":[165],"multi-modal":[166,213],"benchmarks":[167],"multiple":[169],"backbones.":[171],"Experimental":[172],"results":[173],"show":[174],"consistently":[177],"demonstrates":[178],"more":[179],"balanced":[180],"contribution,":[182],"improves":[184],"robustness":[185],"downstream":[187],"performance,":[188],"particularly":[189],"under":[191],"degradation.":[193],"These":[194],"findings":[195],"demonstrate":[196],"cross-modal":[200],"effective":[204],"strategy":[207],"for":[208],"mitigating":[209],"dominance":[211],"reasoning":[214],"models.":[215]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-21T00:00:00"}
