{"id":"https://openalex.org/W7140317497","doi":"https://doi.org/10.48550/arxiv.2603.23272","title":"Multi-Modal Image Fusion via Intervention-Stable Feature Learning","display_name":"Multi-Modal Image Fusion via Intervention-Stable Feature Learning","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140317497","doi":"https://doi.org/10.48550/arxiv.2603.23272"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23272","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23272","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23272","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130629597","display_name":"Xue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Xue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130561832","display_name":"Zheng Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130555120","display_name":"Wenhua Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Wenhua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130583944","display_name":"Chengchao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chengchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5049322661","display_name":"Runzhuo Ma","orcid":"https://orcid.org/0000-0003-2864-8725"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Runzhuo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5130629597"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.909500002861023,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.909500002861023,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.02239999920129776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.009399999864399433,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.8233000040054321},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5473999977111816},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48590001463890076},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.4207000136375427},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4104999899864197},{"id":"https://openalex.org/keywords/dropout","display_name":"Dropout (neural networks)","score":0.40689998865127563},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.39169999957084656},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.3504999876022339},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.34860000014305115}],"concepts":[{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.8233000040054321},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6541000008583069},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6516000032424927},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5473999977111816},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48590001463890076},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4104999899864197},{"id":"https://openalex.org/C2776145597","wikidata":"https://www.wikidata.org/wiki/Q25339462","display_name":"Dropout (neural networks)","level":2,"score":0.40689998865127563},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.39169999957084656},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3698999881744385},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.3504999876022339},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.34860000014305115},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.32850000262260437},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3172000050544739},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.3118000030517578},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C79518650","wikidata":"https://www.wikidata.org/wiki/Q2081431","display_name":"Integrator","level":3,"score":0.25870001316070557},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.25780001282691956},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2524999976158142},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23272","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23272","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23272","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23272","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5396844744682312}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-modal":[0],"image":[1],"fusion":[2],"integrates":[3],"complementary":[4,68],"information":[5],"from":[6,50],"different":[7,62,133],"modalities":[8,76],"into":[9],"a":[10,117],"unified":[11],"representation.":[12],"Current":[13],"methods":[14],"predominantly":[15],"optimize":[16],"statistical":[17],"correlations":[18],"between":[19],"modalities,":[20],"often":[21],"capturing":[22,141],"dataset-induced":[23],"spurious":[24,147],"associations":[25],"that":[26,94,122,152],"degrade":[27],"under":[28,97],"distribution":[29],"shifts.":[30],"In":[31],"this":[32],"paper,":[33],"we":[34,54,115],"propose":[35],"an":[36],"intervention-based":[37],"framework":[38],"inspired":[39],"by":[40],"causal":[41,52],"principles":[42],"to":[43,60,124],"identify":[44,125],"robust":[45,142],"cross-modal":[46],"dependencies.":[47],"Drawing":[48],"insights":[49],"Pearl's":[51],"hierarchy,":[53],"design":[55],"three":[56],"principled":[57],"intervention":[58],"strategies":[59],"probe":[61],"aspects":[63],"of":[64,88,108],"modal":[65,143],"relationships:":[66],"i)":[67],"masking":[69,87],"with":[70],"spatially":[71],"disjoint":[72],"perturbations":[73],"tests":[74],"whether":[75],"can":[77],"genuinely":[78],"compensate":[79],"for":[80],"each":[81,109],"other's":[82],"missing":[83],"information,":[84],"ii)":[85],"random":[86],"identical":[89],"regions":[90],"identifies":[91],"feature":[92],"subsets":[93],"remain":[95],"informative":[96],"partial":[98],"observability,":[99],"and":[100,126,162],"iii)":[101],"modality":[102],"dropout":[103],"evaluates":[104],"the":[105],"irreplaceable":[106],"contribution":[107],"modality.":[110],"Based":[111],"on":[112,158],"these":[113],"interventions,":[114],"introduce":[116],"Causal":[118],"Feature":[119],"Integrator":[120],"(CFI)":[121],"learns":[123],"prioritize":[127],"intervention-stable":[128],"features":[129],"maintaining":[130],"importance":[131],"across":[132],"perturbation":[134],"patterns":[135],"through":[136],"adaptive":[137],"invariance":[138],"gating,":[139],"thereby":[140],"dependencies":[144],"rather":[145],"than":[146],"correlations.":[148],"Extensive":[149],"experiments":[150],"demonstrate":[151],"our":[153],"method":[154],"achieves":[155],"SOTA":[156],"performance":[157],"both":[159],"public":[160],"benchmarks":[161],"downstream":[163],"high-level":[164],"vision":[165],"tasks.":[166]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
