{"id":"https://openalex.org/W4417248876","doi":"https://doi.org/10.1109/tpami.2025.3642842","title":"Towards Unified Semantic and Controllable Image Fusion: A Diffusion Transformer Approach","display_name":"Towards Unified Semantic and Controllable Image Fusion: A Diffusion Transformer Approach","publication_year":2025,"publication_date":"2025-12-11","ids":{"openalex":"https://openalex.org/W4417248876","doi":"https://doi.org/10.1109/tpami.2025.3642842","pmid":"https://pubmed.ncbi.nlm.nih.gov/41379913"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3642842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3642842","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100769534","display_name":"Jiayang Li","orcid":"https://orcid.org/0009-0009-4267-6897"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiayang Li","raw_affiliation_strings":["Faculty of Computing, Harbin Institute of Technology, Harbin, China","Faculty of Computing, Harbin Institute of Technology, Harbin"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Faculty of Computing, Harbin Institute of Technology, Harbin","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056841900","display_name":"Chengjie Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengjie Jiang","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I4210114105","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011573110","display_name":"Pengwei Liang","orcid":"https://orcid.org/0000-0003-0173-1385"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjun Jiang","raw_affiliation_strings":["Faculty of Computing, Harbin Institute of Technology, Harbin, China","Faculty of Computing, Harbin Institute of Technology, Harbin"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Faculty of Computing, Harbin Institute of Technology, Harbin","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Pengwei Liang","orcid":"https://orcid.org/0000-0003-0173-1385"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengwei Liang","raw_affiliation_strings":["Faculty of Computing, Harbin Institute of Technology, Harbin, China","Faculty of Computing, Harbin Institute of Technology, Harbin"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Faculty of Computing, Harbin Institute of Technology, Harbin","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiayi Ma","orcid":"https://orcid.org/0000-0003-3264-3265"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayi Ma","raw_affiliation_strings":["Electronic Information School, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Electronic Information School, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":null,"display_name":"Liqiang Nie","orcid":"https://orcid.org/0000-0003-1476-0273"},"institutions":[{"id":"https://openalex.org/I158809036","display_name":"Shenzhen Institute of Information Technology","ror":"https://ror.org/03wrf9427","country_code":"CN","type":"education","lineage":["https://openalex.org/I158809036"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqiang Nie","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology (Shenzhen), Shenzhen, China","School of Computer Science and Technology, HarbinInstitute of Technology (Shenzhen), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I158809036","https://openalex.org/I204983213"]},{"raw_affiliation_string":"School of Computer Science and Technology, HarbinInstitute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I158809036","https://openalex.org/I204983213","https://openalex.org/I4210152380"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100769534"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":1.1372,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86028811,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"48","issue":"4","first_page":"3970","last_page":"3987"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.8878999948501587,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.8878999948501587,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.07360000163316727,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.004600000102072954,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5737000107765198},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.49709999561309814},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4690999984741211},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.46810001134872437},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.45500001311302185},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.42320001125335693},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.38850000500679016},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.3799000084400177}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.824400007724762},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5760999917984009},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5737000107765198},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.49709999561309814},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4690999984741211},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.46810001134872437},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.45500001311302185},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.42320001125335693},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39169999957084656},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.38850000500679016},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.3799000084400177},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3662000000476837},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34049999713897705},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.27079999446868896},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.2603999972343445},{"id":"https://openalex.org/C2778971668","wikidata":"https://www.wikidata.org/wiki/Q5510284","display_name":"Fusion rules","level":4,"score":0.2535000145435333}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3642842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3642842","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41379913","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41379913","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W2152218325","https://openalex.org/W2603777577","https://openalex.org/W2783573276","https://openalex.org/W2798987894","https://openalex.org/W2912147220","https://openalex.org/W2963787388","https://openalex.org/W2998012573","https://openalex.org/W2998529071","https://openalex.org/W3011768656","https://openalex.org/W3015396480","https://openalex.org/W3030921250","https://openalex.org/W3046194589","https://openalex.org/W3047464731","https://openalex.org/W3083574456","https://openalex.org/W3109494165","https://openalex.org/W3152132512","https://openalex.org/W3158080681","https://openalex.org/W3162209674","https://openalex.org/W3171420223","https://openalex.org/W3180355996","https://openalex.org/W3206748188","https://openalex.org/W3213472242","https://openalex.org/W4206713196","https://openalex.org/W4220893768","https://openalex.org/W4283732315","https://openalex.org/W4304080362","https://openalex.org/W4308310215","https://openalex.org/W4309730479","https://openalex.org/W4312440143","https://openalex.org/W4312594135","https://openalex.org/W4313021454","https://openalex.org/W4379209584","https://openalex.org/W4386071458","https://openalex.org/W4386076504","https://openalex.org/W4386083029","https://openalex.org/W4390727645","https://openalex.org/W4390872297","https://openalex.org/W4390872797","https://openalex.org/W4390873110","https://openalex.org/W4393148388","https://openalex.org/W4396621108","https://openalex.org/W4396782929","https://openalex.org/W4399049957","https://openalex.org/W4401944810","https://openalex.org/W4402715923","https://openalex.org/W4402727881","https://openalex.org/W4402754134","https://openalex.org/W4403792106","https://openalex.org/W4404515348","https://openalex.org/W4404612908","https://openalex.org/W4404936032","https://openalex.org/W4405755299","https://openalex.org/W4407736418","https://openalex.org/W4408355349","https://openalex.org/W4409200564","https://openalex.org/W4410226793","https://openalex.org/W4411949565","https://openalex.org/W4413144420","https://openalex.org/W4413144810","https://openalex.org/W4413147858","https://openalex.org/W4413158163","https://openalex.org/W4415795416","https://openalex.org/W4415797524"],"related_works":[],"abstract_inverted_index":{"Image":[0],"fusion":[1,19,38,56,96,148],"aims":[2],"to":[3,49,145],"blend":[4],"complementary":[5],"information":[6],"from":[7],"diverse":[8],"sensing":[9],"modalities,":[10],"yet":[11],"most":[12],"current":[13],"methods":[14],"lack":[15],"robustness":[16],"in":[17],"complex":[18],"scenarios":[20],"and":[21,52,75,102,109,120,126,131,142],"cannot":[22],"flexibly":[23,45],"accommodate":[24],"user":[25,140],"intent.":[26],"We":[27],"present":[28],"DiTFuse,":[29],"the":[30,67,92],"first":[31],"Diffusion-Transformer":[32],"(DiT)":[33],"framework":[34],"for":[35],"instruction-driven,":[36],"dynamic":[37],"control.":[39],"Guided":[40],"by":[41],"natural-language":[42],"instructions,":[43],"DiTFuse":[44,98],"blends":[46],"multimodal":[47],"content":[48],"enable":[50],"hierarchical":[51],"fine-grained":[53],"control":[54,141],"over":[55],"dynamics.":[57],"The":[58,135],"training":[59],"phase":[60],"employs":[61],"a":[62,112],"multi-degrade-mask-image-modeling":[63],"(M3)":[64],"strategy,":[65],"so":[66],"network":[68],"jointly":[69],"learns":[70],"cross-modal":[71],"alignment,":[72],"modality-invariant":[73],"restoration,":[74],"task-aware":[76],"feature":[77],"selection":[78],"without":[79],"relying":[80],"on":[81,116],"ideal":[82],"reference":[83],"images.":[84],"A":[85],"curated,":[86],"multi-granularity":[87],"instruction":[88],"dataset":[89],"further":[90],"equips":[91],"model":[93,136],"with":[94],"interactive":[95],"capabilities.":[97],"unifies":[99],"infrared-visible,":[100],"multi-focus,":[101],"multi-exposure":[103],"fusion-as":[104],"well":[105],"as":[106],"text-controlled":[107],"refinement":[108],"downstream":[110],"tasks-within":[111],"single":[113],"architecture.":[114],"Experiments":[115],"public":[117],"IVIF,":[118],"MFF,":[119],"MEF":[121],"benchmarks":[122],"confirm":[123],"superior":[124],"quantitative":[125],"qualitative":[127],"performance,":[128],"sharper":[129],"textures,":[130],"better":[132],"semantic":[133],"retention.":[134],"also":[137],"supports":[138],"multi-level":[139],"zero-shot":[143],"generalization":[144],"other":[146],"multiimage":[147],"scenarios,":[149],"including":[150],"instruction-conditioned":[151],"segmentation.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-11T00:00:00"}
