{"id":"https://openalex.org/W4399418400","doi":"https://doi.org/10.1145/3652583.3657616","title":"DiffHarmony: Latent Diffusion Model Meets Image Harmonization","display_name":"DiffHarmony: Latent Diffusion Model Meets Image Harmonization","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399418400","doi":"https://doi.org/10.1145/3652583.3657616"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3657616","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3657616","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657616","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657616","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002352323","display_name":"Pengfei Zhou","orcid":"https://orcid.org/0009-0008-8469-1123"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengfei Zhou","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013283012","display_name":"Fangxiang Feng","orcid":"https://orcid.org/0000-0002-4798-4233"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangxiang Feng","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100351304","display_name":"Xiaojie Wang","orcid":"https://orcid.org/0000-0003-0314-8951"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojie Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5002352323"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":1.0526,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.76925089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1130","last_page":"1134"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7997516393661499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6484980583190918},{"id":"https://openalex.org/keywords/harmonization","display_name":"Harmonization","score":0.5507508516311646},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5383867025375366},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5272337794303894},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.518751859664917},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.48307183384895325},{"id":"https://openalex.org/keywords/image-registration","display_name":"Image registration","score":0.4679388403892517},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.45401516556739807},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4374169707298279},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3447648286819458}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7997516393661499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6484980583190918},{"id":"https://openalex.org/C2779962950","wikidata":"https://www.wikidata.org/wiki/Q5659376","display_name":"Harmonization","level":2,"score":0.5507508516311646},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5383867025375366},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5272337794303894},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.518751859664917},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.48307183384895325},{"id":"https://openalex.org/C166704113","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image registration","level":3,"score":0.4679388403892517},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.45401516556739807},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4374169707298279},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3447648286819458},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652583.3657616","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3657616","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657616","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3652583.3657616","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3657616","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657616","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399418400.pdf","grobid_xml":"https://content.openalex.org/works/W4399418400.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W2129069237","https://openalex.org/W2165633874","https://openalex.org/W2190424233","https://openalex.org/W2960974111","https://openalex.org/W2962737447","https://openalex.org/W3008540959","https://openalex.org/W3034684802","https://openalex.org/W3036167779","https://openalex.org/W3120025849","https://openalex.org/W3155072588","https://openalex.org/W3162926177","https://openalex.org/W3169823210","https://openalex.org/W3171358896","https://openalex.org/W3171714858","https://openalex.org/W3177150392","https://openalex.org/W3194909440","https://openalex.org/W3212516020","https://openalex.org/W4214738329","https://openalex.org/W4225322336","https://openalex.org/W4281969232","https://openalex.org/W4304098227","https://openalex.org/W4312933868","https://openalex.org/W4321177576","https://openalex.org/W4360594859","https://openalex.org/W4381552819","https://openalex.org/W4385681234","https://openalex.org/W4387968695","https://openalex.org/W4390873054","https://openalex.org/W4392904768","https://openalex.org/W6779823529","https://openalex.org/W6795288823","https://openalex.org/W6804122398","https://openalex.org/W6840815571","https://openalex.org/W6854019362","https://openalex.org/W6859818547"],"related_works":["https://openalex.org/W2006073222","https://openalex.org/W2488916264","https://openalex.org/W2323573032","https://openalex.org/W198625436","https://openalex.org/W1908077024","https://openalex.org/W2095388346","https://openalex.org/W2379751185","https://openalex.org/W2379404183","https://openalex.org/W2004509987","https://openalex.org/W3145231964"],"abstract_inverted_index":{"Image":[0],"harmonization,":[1],"which":[2],"involves":[3],"adjusting":[4],"the":[5,18,33,58,63,96,102,128,131,141],"foreground":[6],"of":[7,36,130,143],"a":[8,13,90],"composite":[9],"image":[10,64,71,97],"to":[11,95,100,125],"attain":[12],"unified":[14],"visual":[15],"consistency":[16],"with":[17,57,81],"background,":[19],"can":[20],"be":[21],"conceptualized":[22],"as":[23],"an":[24,121],"image-to-image":[25,37],"translation":[26,38],"task.":[27],"Diffusion":[28],"models":[29,44,54],"have":[30],"recently":[31],"promoted":[32],"rapid":[34],"development":[35],"tasks":[39,73],".":[40],"However,":[41],"training":[42],"diffusion":[43,53,93],"from":[45],"scratch":[46],"is":[47,149],"computationally":[48],"intensive.":[49],"Fine-tuning":[50],"pre-trained":[51,91],"latent":[52,92],"entails":[55],"dealing":[56],"reconstruction":[59],"error":[60],"induced":[61],"by":[62],"compression":[65],"autoencoder,":[66],"making":[67],"it":[68],"unsuitable":[69],"for":[70],"generation":[72],"that":[74],"involve":[75],"pixel-level":[76],"evaluation":[77],"metrics.":[78],"To":[79],"deal":[80],"these":[82],"issues,":[83],"in":[84],"this":[85],"paper,":[86],"we":[87,110],"first":[88],"adapt":[89],"model":[94],"harmonization":[98],"task":[99],"generate":[101],"harmonious":[103],"but":[104],"potentially":[105],"blurry":[106],"initial":[107],"images.":[108,134],"Then":[109],"implement":[111],"two":[112],"strategies:":[113],"utilizing":[114],"higher-resolution":[115],"images":[116],"during":[117],"inference":[118],"and":[119],"incorporating":[120],"additional":[122],"refinement":[123],"stage,":[124],"further":[126],"enhance":[127],"clarity":[129],"initially":[132],"harmonized":[133],"Extensive":[135],"experiments":[136],"on":[137],"iHarmony4":[138],"datasets":[139],"demonstrate":[140],"superiority":[142],"our":[144],"proposed":[145],"method.":[146],"The":[147],"code":[148],"available":[150],"at":[151],"\\hrefhttps://github.com/nicecv/DiffHarmony":[152],"https://github.com/nicecv/DiffHarmony.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
