{"id":"https://openalex.org/W7138035515","doi":"https://doi.org/10.1609/aaai.v40i8.37581","title":"Text-Guided Channel Perturbation and Pre-Trained Knowledge Integration for Unified Multi-Modality Image Fusion","display_name":"Text-Guided Channel Perturbation and Pre-Trained Knowledge Integration for Unified Multi-Modality Image Fusion","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138035515","doi":"https://doi.org/10.1609/aaai.v40i8.37581"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i8.37581","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37581","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i8.37581","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020938449","display_name":"Xilai Li","orcid":"https://orcid.org/0000-0001-9859-8191"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xilai Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129644799","display_name":"Xiaosong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaosong Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123457786","display_name":"Weijun Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weijun Jiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020938449"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25373134,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"8","first_page":"6521","last_page":"6529"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9297000169754028,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9297000169754028,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.016300000250339508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.009499999694526196,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/affine-transformation","display_name":"Affine transformation","score":0.6855000257492065},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.6322000026702881},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5458999872207642},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5092999935150146},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4959999918937683},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.4909000098705292},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4814000129699707},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.43779999017715454}],"concepts":[{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.6855000257492065},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.6322000026702881},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6080999970436096},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5458999872207642},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.524399995803833},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5092999935150146},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4959999918937683},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.4909000098705292},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.43779999017715454},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4097000062465668},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4032999873161316},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38679999113082886},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3691999912261963},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3278999924659729},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C2221639","wikidata":"https://www.wikidata.org/wiki/Q2877","display_name":"Discrete cosine transform","level":3,"score":0.28600001335144043},{"id":"https://openalex.org/C2778328480","wikidata":"https://www.wikidata.org/wiki/Q1639904","display_name":"Hybrid image","level":3,"score":0.28130000829696655},{"id":"https://openalex.org/C2778971668","wikidata":"https://www.wikidata.org/wiki/Q5510284","display_name":"Fusion rules","level":4,"score":0.2802000045776367},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i8.37581","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37581","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i8.37581","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37581","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7396248579025269,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-modality":[0],"image":[1,20,63,173],"fusion":[2,43,52,64,129,174],"enhances":[3],"scene":[4],"perception":[5,40,96],"by":[6],"combining":[7],"complementary":[8],"information.":[9],"Unified":[10],"models":[11],"aim":[12],"to":[13,37,102,123,131,149],"share":[14],"parameters":[15],"across":[16,50],"modalities":[17],"for":[18],"multi-modality":[19,62,106,172],"fusion,":[21],"but":[22],"large":[23],"modality":[24],"differences":[25],"often":[26],"cause":[27],"gradient":[28],"conflicts,":[29],"limiting":[30],"performance.":[31],"Some":[32],"methods":[33,169],"introduce":[34],"modality-specific":[35,158],"encoders":[36],"enhance":[38,105],"feature":[39,107,134],"and":[41,70,80,104,175],"improve":[42],"quality.":[44],"However,":[45],"this":[46,56],"strategy":[47],"reduces":[48],"generalisation":[49],"different":[51],"tasks.":[53,177],"To":[54,75],"overcome":[55],"limitation,":[57],"we":[58,84,110,139],"propose":[59,85],"a":[60,99,141],"unified":[61],"framework":[65],"based":[66],"on":[67,127,157,170],"channel":[68,152],"perturbation":[69],"pre-trained":[71,100],"knowledge":[72],"integration":[73],"(UP-Fusion).":[74],"suppress":[76],"redundant":[77],"modal":[78,121,136],"information":[79],"emphasize":[81],"key":[82],"features,":[83],"the":[86,94,112,133,151,155,164],"Semantic-Aware":[87],"Channel":[88,143],"Pruning":[89],"Module":[90,116,145],"(SCPM),":[91],"which":[92,118],"leverages":[93],"semantic":[95],"capability":[97],"of":[98],"model":[101],"filter":[103],"channels.":[108,159],"Furthermore,":[109],"proposed":[111,165],"Geometric":[113],"Affine":[114],"Modulation":[115],"(GAM),":[117],"uses":[119],"original":[120],"features":[122,130],"apply":[124,140],"affine":[125],"transformations":[126],"initial":[128],"maintain":[132],"encoder":[135],"discriminability.":[137],"Finally,":[138],"Text-Guided":[142],"Perturbation":[144],"(TCPM)":[146],"during":[147],"decoding":[148],"reshape":[150],"distribution,":[153],"reducing":[154],"dependence":[156],"Extensive":[160],"experiments":[161],"demonstrate":[162],"that":[163],"algorithm":[166],"outperforms":[167],"existing":[168],"both":[171],"downstream":[176]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
