{"id":"https://openalex.org/W4414360626","doi":"https://doi.org/10.24963/ijcai.2025/209","title":"CMFS: CLIP-Guided Modality Interaction for Mitigating Noise in Multi-Modal Image Fusion and Segmentation","display_name":"CMFS: CLIP-Guided Modality Interaction for Mitigating Noise in Multi-Modal Image Fusion and Segmentation","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360626","doi":"https://doi.org/10.24963/ijcai.2025/209"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/209","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024756532","display_name":"Guocheng Su","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guilin Su","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051073531","display_name":"Yuqing Huang","orcid":"https://orcid.org/0000-0003-4583-6365"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuqing Huang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen","Peng Cheng Laboratory"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Peng Cheng Laboratory","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060061835","display_name":"Chao Yang","orcid":"https://orcid.org/0000-0003-1103-0056"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Yang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100740564","display_name":"Zhenyu He","orcid":"https://orcid.org/0000-0002-2546-8721"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenyu He","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5024756532"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":1.2803,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86176853,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1873","last_page":"1881"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.9611999988555908,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.9577999711036682,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6977999806404114},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6468999981880188},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5809000134468079},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5649999976158142},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5515999794006348},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.545199990272522},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.503000020980835},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4772000014781952},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.44589999318122864}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7486000061035156},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6977999806404114},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6819999814033508},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6468999981880188},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5809000134468079},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5649999976158142},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5515999794006348},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5483999848365784},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.545199990272522},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.503000020980835},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4772000014781952},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.44589999318122864},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4011000096797943},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3864000141620636},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.336899995803833},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.319599986076355},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.30410000681877136},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C35772409","wikidata":"https://www.wikidata.org/wiki/Q1323086","display_name":"Image noise","level":3,"score":0.2671000063419342},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/209","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Infrared-visible":[0],"image":[1,59,162],"fusion":[2,35,60,139,163],"and":[3,30,37,61,87,112,116,123,140,165],"semantic":[4,166],"segmentation":[5,38,167],"are":[6],"pivotal":[7],"tasks":[8],"for":[9],"robust":[10,85],"scene":[11],"understanding":[12],"under":[13],"challenging":[14],"conditions":[15],"such":[16],"as":[17],"low":[18],"light.":[19],"However,":[20],"existing":[21],"methods":[22],"often":[23],"struggle":[24],"with":[25,77,133,152],"high":[26],"noise,":[27],"modality":[28,52],"inconsistencies,":[29],"inefficient":[31],"cross-modal":[32],"interactions,":[33],"limiting":[34],"quality":[36,164],"accuracy.":[39,143],"To":[40],"this":[41],"end,":[42],"we":[43,128],"propose":[44],"CMFS,":[45],"a":[46,66,74,88,105,130],"unified":[47],"framework":[48],"that":[49,72],"leverages":[50],"CLIP-guided":[51],"interaction":[53,94],"to":[54,82,96,119,136],"mitigate":[55],"noise":[56],"in":[57,160,170],"multi-modal":[58,92],"segmentation.":[62],"Our":[63],"approach":[64,157],"features":[65,86,118],"region-aware":[67],"Modal":[68],"Interaction":[69],"Alignment":[70],"module":[71,108],"combines":[73],"VMamba-based":[75],"encoder":[76],"an":[78],"additional":[79],"shuffle":[80],"layer":[81],"obtain":[83],"more":[84],"CLIP-guided,":[89],"regionally":[90],"constrained":[91],"feature":[93,125],"block":[95],"emphasize":[97],"foreground":[98],"targets":[99],"while":[100],"suppressing":[101],"low-light":[102],"noise.":[103],"Additionally,":[104],"Frequency-Spatial":[106],"Collaboration":[107],"uses":[109],"selective":[110],"scanning":[111],"integrates":[113],"wavelet-,":[114],"spatial-,":[115],"Fourier-domain":[117],"achieve":[120],"adaptive":[121],"denoising":[122],"balanced":[124],"allocation.":[126],"Furthermore,":[127],"employ":[129],"low-rank":[131],"mixture-of-experts":[132],"dynamic":[134],"routing":[135],"improve":[137],"region-specific":[138],"enhance":[141],"pixel-level":[142],"Extensive":[144],"experiments":[145],"on":[146],"several":[147],"benchmarks":[148],"show":[149],"that,":[150],"compared":[151],"state-of-the-art":[153],"methods,":[154],"the":[155],"proposed":[156],"demonstrates":[158],"effectiveness":[159],"both":[161],"accuracy,":[168],"especially":[169],"complex":[171],"environments.":[172],"The":[173],"source":[174],"code":[175],"will":[176],"be":[177],"released":[178],"at":[179],"IJCAI2025-CMFS.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
