{"id":"https://openalex.org/W4403791899","doi":"https://doi.org/10.1145/3664647.3680729","title":"Equilibrated Diffusion: Frequency-aware Textual Embedding for Equilibrated Image Customization","display_name":"Equilibrated Diffusion: Frequency-aware Textual Embedding for Equilibrated Image Customization","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791899","doi":"https://doi.org/10.1145/3664647.3680729"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680729","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680729","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101796272","display_name":"Liyuan Ma","orcid":"https://orcid.org/0000-0002-9492-5324"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liyuan Ma","raw_affiliation_strings":["Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103152458","display_name":"Xueji Fang","orcid":"https://orcid.org/0000-0002-7993-5188"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueji Fang","raw_affiliation_strings":["Zhejiang University &amp; Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University &amp; Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985","https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100766907","display_name":"Guo-Jun Qi","orcid":"https://orcid.org/0000-0003-3508-1851"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guo-Jun Qi","raw_affiliation_strings":["Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101796272"],"corresponding_institution_ids":["https://openalex.org/I3133055985"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19253382,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"196","last_page":"204"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6399168968200684},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.6380451917648315},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5884290933609009},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5281327366828918},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.49626976251602173},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.368941992521286},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3593882918357849},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33694717288017273},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.21946081519126892},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1379261314868927}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6399168968200684},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.6380451917648315},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5884290933609009},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5281327366828918},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.49626976251602173},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.368941992521286},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3593882918357849},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33694717288017273},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.21946081519126892},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1379261314868927},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680729","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680729","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W3159481202","https://openalex.org/W3203605797","https://openalex.org/W4212774754","https://openalex.org/W4308558335","https://openalex.org/W4312933868","https://openalex.org/W4312985394","https://openalex.org/W4385271055","https://openalex.org/W4385535331","https://openalex.org/W4385537492","https://openalex.org/W4385800625","https://openalex.org/W4386072096","https://openalex.org/W4386076425","https://openalex.org/W4386083151","https://openalex.org/W4389347744","https://openalex.org/W4389539271","https://openalex.org/W4390872341","https://openalex.org/W4390873195","https://openalex.org/W4390874393","https://openalex.org/W4393158734","https://openalex.org/W6838697126"],"related_works":["https://openalex.org/W2109940557","https://openalex.org/W2466832359","https://openalex.org/W2081900870","https://openalex.org/W4391210591","https://openalex.org/W1582019636","https://openalex.org/W1499005795","https://openalex.org/W3172493050","https://openalex.org/W4385420271","https://openalex.org/W2141531133","https://openalex.org/W2312145515"],"abstract_inverted_index":{"Image":[0],"customization":[1,79,182],"involves":[2],"learning":[3],"the":[4,46,55,116,132,141,144,179,196,217,249],"subject":[5,157,192,239],"from":[6,85,115],"provided":[7],"concept":[8,43,83,129,138],"images":[9],"and":[10,62,92,104,120,126],"generating":[11],"it":[12],"within":[13],"textual":[14,91],"contexts,":[15],"typically":[16],"yielding":[17],"alterations":[18],"of":[19,57,152,198,220,251],"attributes":[20,59,84],"such":[21],"as":[22],"style":[23,61,125,153],"or":[24],"background.":[25],"Prevailing":[26],"methods":[27],"primarily":[28],"rely":[29],"on":[30],"fine-tuning":[31],"technique,":[32],"wherein":[33],"a":[34,86,100,187],"unified":[35],"latent":[36,102],"embedding":[37,103],"is":[38,214,256],"employed":[39],"to":[40,53,107,156,194,244],"characterize":[41],"various":[42],"attributes.":[44],"However,":[45],"attribute":[47,154],"entanglement":[48],"renders":[49],"customized":[50],"result":[51],"challenging":[52],"mitigate":[54],"influence":[56,197],"subject-irrelevant":[58],"(e.g.,":[60],"background).":[63],"To":[64,205],"overcome":[65],"these":[66],"issues,":[67],"we":[68],"propose":[69],"Equilibrated":[70,111,232],"Diffusion,":[71],"an":[72],"innovative":[73],"method":[74],"that":[75,98,231],"achieves":[76],"equilibrated":[77],"image":[78,124],"by":[80,171,191],"decoupling":[81],"entangled":[82],"frequency-aware":[87],"perspective,":[88],"thus":[89,247],"harmonizing":[90],"visual":[93],"consistency.":[94],"Unlike":[95],"conventional":[96],"approaches":[97],"employ":[99],"shared":[101],"tuning":[105],"process":[106,189],"learn":[108],"concept,":[109],"our":[110,176,252],"Diffusion":[112,233],"draws":[113],"inspiration":[114],"correlation":[117],"between":[118],"high-":[119],"low-frequency":[121],"components":[122],"with":[123,237],"content,":[127],"decomposing":[128],"accordingly":[130],"in":[131,140],"frequency":[133,142,174],"domain.":[134],"Through":[135],"independently":[136],"optimizing":[137],"embeddings":[139],"domain,":[143],"denoising":[145,218],"model":[146,177,219],"not":[147],"only":[148],"enriches":[149],"its":[150,163],"comprehension":[151],"irrelevant":[155],"identity":[158],"but":[159],"also":[160],"inherently":[161],"augments":[162],"aptitude":[164],"for":[165],"accommodating":[166],"novel":[167],"stylized":[168],"descriptions.":[169],"Furthermore,":[170],"combining":[172],"different":[173],"embeddings,":[175],"retains":[178],"spatially":[180],"original":[181],"capability.":[183],"We":[184],"further":[185],"design":[186],"diffusion":[188],"guided":[190],"masks":[193],"alleviate":[195],"background":[199],"attribute,":[200],"thereby":[201],"strengthening":[202],"text":[203,245],"alignment.":[204],"ensure":[206],"subject-related":[207],"information":[208],"consistency,":[209],"Residual":[210],"Reference":[211],"Attention":[212],"(RRA)":[213],"incorporated":[215],"into":[216],"spatial":[221],"attention":[222],"computation,":[223],"effectively":[224],"preserving":[225],"structural":[226],"details.":[227],"Experimental":[228],"results":[229],"demonstrate":[230],"surpasses":[234],"other":[235],"competitors":[236],"better":[238],"consistency":[240],"while":[241],"closely":[242],"adhering":[243],"descriptions,":[246],"validating":[248],"superiority":[250],"approach.":[253],"The":[254],"code":[255],"available":[257],"at":[258],"https://github.com/maple-research-lab/EqDiff.":[259]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
