{"id":"https://openalex.org/W4406982886","doi":"https://doi.org/10.1109/tmm.2025.3535343","title":"Zero-Shot Image Harmonization With Generative Model Prior","display_name":"Zero-Shot Image Harmonization With Generative Model Prior","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4406982886","doi":"https://doi.org/10.1109/tmm.2025.3535343"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3535343","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3535343","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032819369","display_name":"Jianqi Chen","orcid":"https://orcid.org/0000-0003-0031-8417"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianqi Chen","raw_affiliation_strings":["Image Processing Center, School of Astronautics, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Image Processing Center, School of Astronautics, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005874473","display_name":"Yilan Zhang","orcid":"https://orcid.org/0000-0003-3633-7038"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yilan Zhang","raw_affiliation_strings":["Image Processing Center, School of Astronautics, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Image Processing Center, School of Astronautics, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088611151","display_name":"Zhengxia Zou","orcid":"https://orcid.org/0000-0003-1774-552X"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengxia Zou","raw_affiliation_strings":["Department of Guidance, Navigation and Control, School of Astronautics, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Guidance, Navigation and Control, School of Astronautics, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101518544","display_name":"Keyan Chen","orcid":"https://orcid.org/0000-0003-0483-1306"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keyan Chen","raw_affiliation_strings":["Image Processing Center, School of Astronautics, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Image Processing Center, School of Astronautics, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058849690","display_name":"Zhenwei Shi","orcid":"https://orcid.org/0000-0002-4772-3172"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenwei Shi","raw_affiliation_strings":["Image Processing Center, School of Astronautics, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Image Processing Center, School of Astronautics, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032819369"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":1.2784,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.76538422,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"27","issue":null,"first_page":"4494","last_page":"4507"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7113954424858093},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5919345021247864},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.587846040725708},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4986710548400879},{"id":"https://openalex.org/keywords/harmonization","display_name":"Harmonization","score":0.47959959506988525},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4578602910041809},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.44870510697364807},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4426341652870178}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7113954424858093},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5919345021247864},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.587846040725708},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4986710548400879},{"id":"https://openalex.org/C2779962950","wikidata":"https://www.wikidata.org/wiki/Q5659376","display_name":"Harmonization","level":2,"score":0.47959959506988525},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4578602910041809},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44870510697364807},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4426341652870178},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3535343","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3535343","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7969261120","display_name":null,"funder_award_id":"623B2013","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8062672451","display_name":null,"funder_award_id":"JL23005","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G8320858330","display_name":null,"funder_award_id":"62125102","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1997488488","https://openalex.org/W2006957355","https://openalex.org/W2017745767","https://openalex.org/W2036167211","https://openalex.org/W2054978444","https://openalex.org/W2083366168","https://openalex.org/W2099163172","https://openalex.org/W2129112648","https://openalex.org/W2138467836","https://openalex.org/W2141015396","https://openalex.org/W2144502914","https://openalex.org/W2159871358","https://openalex.org/W2164147879","https://openalex.org/W2165633874","https://openalex.org/W2507296351","https://openalex.org/W2519963891","https://openalex.org/W2962737447","https://openalex.org/W2963835354","https://openalex.org/W2982695696","https://openalex.org/W3034684802","https://openalex.org/W3088065502","https://openalex.org/W3120025849","https://openalex.org/W3127541803","https://openalex.org/W3168729576","https://openalex.org/W3171358896","https://openalex.org/W3195577433","https://openalex.org/W3211329537","https://openalex.org/W3216352822","https://openalex.org/W4214774769","https://openalex.org/W4214919319","https://openalex.org/W4224035735","https://openalex.org/W4225322336","https://openalex.org/W4230472795","https://openalex.org/W4281485151","https://openalex.org/W4283388932","https://openalex.org/W4295903508","https://openalex.org/W4311252447","https://openalex.org/W4312234230","https://openalex.org/W4312502952","https://openalex.org/W4312698476","https://openalex.org/W4312926745","https://openalex.org/W4312933868","https://openalex.org/W4312966374","https://openalex.org/W4321021233","https://openalex.org/W4360594859","https://openalex.org/W4386065777","https://openalex.org/W4386075490","https://openalex.org/W4386076532","https://openalex.org/W4386080987","https://openalex.org/W4386083141","https://openalex.org/W4386083151","https://openalex.org/W4387294588","https://openalex.org/W4387609219","https://openalex.org/W4390041933","https://openalex.org/W4402774219","https://openalex.org/W4403422358","https://openalex.org/W6757817989","https://openalex.org/W6783713337","https://openalex.org/W6790978476","https://openalex.org/W6791353385","https://openalex.org/W6795288823","https://openalex.org/W6796242362","https://openalex.org/W6800751262","https://openalex.org/W6809885388","https://openalex.org/W6838639034","https://openalex.org/W6839643428","https://openalex.org/W6840155194","https://openalex.org/W6840815571","https://openalex.org/W6841366371","https://openalex.org/W6841755765","https://openalex.org/W6847118160","https://openalex.org/W6847405979","https://openalex.org/W6855949956","https://openalex.org/W6857614378","https://openalex.org/W6860041859"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2006073222","https://openalex.org/W2488916264","https://openalex.org/W2323573032","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W4411535265","https://openalex.org/W2980279061","https://openalex.org/W2334685461"],"abstract_inverted_index":{"We":[0,101,150],"propose":[1],"a":[2,46,75,96],"zero-shot":[3],"approach":[4,67],"to":[5,9,38,80,128],"image":[6],"harmonization,":[7],"aiming":[8],"overcome":[10],"the":[11,55,84,91,132,145,165],"reliance":[12],"on":[13],"large":[14],"amounts":[15],"of":[16,58,95,108,167],"synthetic":[17],"composite":[18,85],"images":[19],"in":[20,62],"existing":[21],"methods.":[22],"These":[23],"methods,":[24],"while":[25],"showing":[26],"promising":[27],"results,":[28],"involve":[29],"significant":[30],"training":[31],"expenses":[32],"and":[33,64,111,114,158],"often":[34],"struggle":[35],"with":[36,161],"generalization":[37],"unseen":[39],"images.":[40],"To":[41],"this":[42],"end,":[43],"we":[44,73],"introduce":[45],"fully":[47],"modularized":[48],"framework":[49],"inspired":[50],"by":[51],"human":[52,139],"behavior.":[53],"Leveraging":[54],"reasoning":[56],"capabilities":[57],"recent":[59],"foundation":[60],"models":[61],"language":[63],"vision,":[65],"our":[66,168],"comprises":[68],"three":[69],"main":[70],"stages.":[71],"Initially,":[72],"employ":[74,112],"pretrained":[76],"vision-language":[77],"model":[78,99],"(VLM)":[79],"generate":[81],"descriptions":[82,89],"for":[83,105,117,147],"image.":[86],"Subsequently,":[87],"these":[88],"guide":[90],"foreground":[92],"harmonization":[93,122,133],"direction":[94],"text-to-image":[97],"generative":[98],"(T2I).":[100],"refine":[102],"text":[103],"embeddings":[104],"enhanced":[106],"representation":[107],"imaging":[109],"conditions":[110],"self-attention":[113],"edge":[115],"maps":[116],"structure":[118],"preservation.":[119],"Following":[120],"each":[121],"iteration,":[123],"an":[124],"evaluator":[125],"determines":[126],"whether":[127],"conclude":[129],"or":[130],"modify":[131],"direction.":[134],"The":[135],"resulting":[136],"framework,":[137],"mirroring":[138],"behavior,":[140],"achieves":[141],"harmonious":[142],"results":[143,154],"without":[144],"need":[146],"extensive":[148],"training.":[149],"present":[151],"compelling":[152],"visual":[153],"across":[155],"diverse":[156],"scenes":[157],"objects,":[159],"along":[160],"quantitative":[162],"comparisons":[163],"validating":[164],"effectiveness":[166],"approach.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
