{"id":"https://openalex.org/W4414828305","doi":"https://doi.org/10.1109/iccv51701.2025.01606","title":"DreamFuse: Adaptive Image Fusion with Diffusion Transformer","display_name":"DreamFuse: Adaptive Image Fusion with Diffusion Transformer","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4414828305","doi":"https://doi.org/10.1109/iccv51701.2025.01606"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2504.08291","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069576355","display_name":"Junjia Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjia Huang","raw_affiliation_strings":["Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077201605","display_name":"Pengxiang Yan","orcid":"https://orcid.org/0000-0002-3075-2903"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pengxiang Yan","raw_affiliation_strings":["ByteDance Intelligent Creation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Intelligent Creation","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071511255","display_name":"Jiyang Liu","orcid":"https://orcid.org/0000-0003-3699-1646"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jiyang Liu","raw_affiliation_strings":["ByteDance Intelligent Creation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Intelligent Creation","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600528","display_name":"Jie Wu","orcid":"https://orcid.org/0000-0002-3472-1717"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jie Wu","raw_affiliation_strings":["ByteDance Intelligent Creation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Intelligent Creation","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100434571","display_name":"Zhao Wang","orcid":"https://orcid.org/0000-0003-3976-7439"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhao Wang","raw_affiliation_strings":["ByteDance Intelligent Creation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Intelligent Creation","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071923865","display_name":"Y. Wang","orcid":"https://orcid.org/0009-0006-0037-7248"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yitong Wang","raw_affiliation_strings":["ByteDance Intelligent Creation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Intelligent Creation","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412937","display_name":"Liang Lin","orcid":"https://orcid.org/0000-0003-2248-3755"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Lin","raw_affiliation_strings":["Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042965510","display_name":"Guanbin Li","orcid":"https://orcid.org/0000-0002-4805-0926"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanbin Li","raw_affiliation_strings":["Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36112398,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.644599974155426},{"id":"https://openalex.org/keywords/affine-transformation","display_name":"Affine transformation","score":0.5281000137329102},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.4708999991416931},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.461899995803833},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4253999888896942},{"id":"https://openalex.org/keywords/fusion-mechanism","display_name":"Fusion mechanism","score":0.39320001006126404},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.38350000977516174}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7477999925613403},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.644599974155426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5945000052452087},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5515999794006348},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.5281000137329102},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.4708999991416931},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.461899995803833},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4253999888896942},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.39320001006126404},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.38350000977516174},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3481999933719635},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.31299999356269836},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C166704113","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image registration","level":3,"score":0.29010000824928284},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.28299999237060547}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2504.08291","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.08291","pdf_url":"https://arxiv.org/pdf/2504.08291","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2504.08291","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.08291","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2504.08291","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.08291","pdf_url":"https://arxiv.org/pdf/2504.08291","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1437690383","display_name":null,"funder_award_id":"2024YFB3908503","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4751043524","display_name":null,"funder_award_id":"62322608","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image":[0],"fusion":[1,30,75,166],"seeks":[2],"to":[3,41,73,105,124,154,169],"seamlessly":[4],"integrate":[5],"foreground":[6,40,114,132,161],"objects":[7,23],"with":[8,45,69,112],"background":[9,47,116,158],"scenes,":[10],"producing":[11],"realistic":[12],"and":[13,28,80,86,108,115,128,160],"harmonious":[14,109,165],"fused":[15,110,175],"images.":[16],"Unlike":[17],"existing":[18],"methods":[19],"that":[20,180],"directly":[21],"insert":[22],"into":[24,133],"the":[25,39,46,100,126,131,134,174],"background,":[26,135],"adaptive":[27],"interactive":[29],"remains":[31],"a":[32,95,120],"challenging":[33],"yet":[34],"appealing":[35],"task.":[36],"It":[37],"requires":[38],"adjust":[42],"or":[43],"interact":[44],"context,":[48],"enabling":[49,136],"more":[50],"coherent":[51],"integration.":[52],"To":[53],"address":[54],"this,":[55,91],"we":[56,92,144],"propose":[57],"an":[58],"iterative":[59],"human-in-the-loop":[60],"data":[61,68],"generation":[62],"pipeline,":[63],"which":[64],"leverages":[65],"limited":[66],"initial":[67],"diverse":[70],"textual":[71],"prompts":[72],"generate":[74,106],"datasets":[76],"across":[77,186],"various":[78],"scenarios":[79],"interactions,":[81],"including":[82],"placement,":[83],"holding,":[84],"wearing,":[85],"style":[87],"transfer.":[88],"Building":[89],"on":[90,99],"introduce":[93],"DreamFuse,":[94,156],"novel":[96],"approach":[97],"based":[98],"Diffusion":[101],"Transformer":[102],"(DiT)":[103],"model,":[104],"consistent":[107],"images":[111],"both":[113],"information.":[117],"DreamFuse":[118,163],"employs":[119],"Positional":[121],"Affine":[122],"mechanism":[123],"inject":[125],"size":[127],"position":[129],"of":[130,173],"effective":[137],"foreground-background":[138],"interaction":[139],"through":[140],"shared":[141],"attention.":[142],"Furthermore,":[143],"apply":[145],"Localized":[146],"Direct":[147],"Preference":[148],"Optimization":[149],"guided":[150],"by":[151],"human":[152],"feedback":[153],"refine":[155],"enhancing":[157],"consistency":[159],"harmony.":[162],"achieves":[164],"while":[167],"generalizing":[168],"text-driven":[170],"attribute":[171],"editing":[172],"results.":[176],"Experimental":[177],"results":[178],"demonstrate":[179],"our":[181],"method":[182],"outperforms":[183],"state-of-the-art":[184],"approaches":[185],"multiple":[187],"metrics.":[188]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
