{"id":"https://openalex.org/W4403791544","doi":"https://doi.org/10.1145/3664647.3681380","title":"FBSDiff: Plug-and-Play Frequency Band Substitution of Diffusion Features for Highly Controllable Text-Driven Image Translation","display_name":"FBSDiff: Plug-and-Play Frequency Band Substitution of Diffusion Features for Highly Controllable Text-Driven Image Translation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791544","doi":"https://doi.org/10.1145/3664647.3681380"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681380","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681380","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017334961","display_name":"Xiang Gao","orcid":"https://orcid.org/0000-0003-3618-043X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiang Gao","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100761525","display_name":"Jiaying Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaying Liu","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5017334961"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.7479,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73045403,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"4101","last_page":"4109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9624000191688538,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substitution","display_name":"Substitution (logic)","score":0.837803840637207},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6926330327987671},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.6121756434440613},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5515112280845642},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5075923204421997},{"id":"https://openalex.org/keywords/plug-and-play","display_name":"Plug and play","score":0.5018255710601807},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34591251611709595},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.19142121076583862},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.11542466282844543},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07925263047218323}],"concepts":[{"id":"https://openalex.org/C2778220771","wikidata":"https://www.wikidata.org/wiki/Q1522579","display_name":"Substitution (logic)","level":2,"score":0.837803840637207},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6926330327987671},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.6121756434440613},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5515112280845642},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5075923204421997},{"id":"https://openalex.org/C2780070844","wikidata":"https://www.wikidata.org/wiki/Q857815","display_name":"Plug and play","level":2,"score":0.5018255710601807},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34591251611709595},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.19142121076583862},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.11542466282844543},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07925263047218323},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681380","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681380","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2603777577","https://openalex.org/W2609675817","https://openalex.org/W2962785568","https://openalex.org/W3036167779","https://openalex.org/W3109957618","https://openalex.org/W3155072588","https://openalex.org/W3162926177","https://openalex.org/W3180355996","https://openalex.org/W3212516020","https://openalex.org/W4230098700","https://openalex.org/W4234552385","https://openalex.org/W4281485151","https://openalex.org/W4281661987","https://openalex.org/W4281872541","https://openalex.org/W4287210973","https://openalex.org/W4292779060","https://openalex.org/W4301409532","https://openalex.org/W4312234230","https://openalex.org/W4312282373","https://openalex.org/W4312740349","https://openalex.org/W4312933868","https://openalex.org/W4385527149","https://openalex.org/W4386057725","https://openalex.org/W4386075993","https://openalex.org/W4386076215","https://openalex.org/W4386076476","https://openalex.org/W4386076532","https://openalex.org/W4386113271","https://openalex.org/W4390872297","https://openalex.org/W4390873054","https://openalex.org/W4390873319","https://openalex.org/W4393148714","https://openalex.org/W4402715897","https://openalex.org/W6778883912","https://openalex.org/W6779823529","https://openalex.org/W6795288823"],"related_works":["https://openalex.org/W2379444625","https://openalex.org/W2393147081","https://openalex.org/W2575869988","https://openalex.org/W4308647020","https://openalex.org/W2370088286","https://openalex.org/W2385568494","https://openalex.org/W3101578490","https://openalex.org/W4213477128","https://openalex.org/W4287300272","https://openalex.org/W3134797699"],"abstract_inverted_index":{"Large-scale":[0],"text-to-image":[1,52,87],"diffusion":[2,89,135],"models":[3,31],"have":[4],"been":[5,43],"a":[6,47,62,67,77,97,120,145,165],"revolutionary":[7],"milestone":[8],"in":[9,96,137,164,213],"the":[10,24,92,138,156,160,184,190,195],"evolution":[11],"of":[12,26,29,134,155,183,194,207],"generative":[13],"AI,":[14],"allowing":[15],"wonderful":[16],"image":[17,49,64,158,186],"generation":[18,118,162],"with":[19,119,130],"natural-language":[20],"text":[21,68],"prompt.":[22],"However,":[23],"issue":[25],"lacking":[27],"controllability":[28],"such":[30],"restricts":[32],"their":[33],"practical":[34],"applicability":[35],"for":[36],"real-life":[37],"content":[38],"creation.":[39],"Thus,":[40],"attention":[41],"has":[42],"focused":[44],"on":[45],"leveraging":[46],"reference":[48,63,121,157,185],"to":[50,91,125,159],"control":[51,154,175],"synthesis,":[53],"which":[54,151],"is":[55,223],"also":[56],"regarded":[57],"as":[58,65],"manipulating":[59],"(or":[60],"editing)":[61],"per":[66],"prompt,":[69],"namely,":[70],"text-driven":[71,104],"image-to-image":[72,93],"translation.":[73],"This":[74],"paper":[75],"contributes":[76],"novel,":[78],"concise,":[79],"and":[80,102,142,180,192,202,219],"efficient":[81],"approach":[82,209],"that":[83,170],"adapts":[84],"pre-trained":[85],"large-scale":[86],"(T2I)":[88],"model":[90,108],"(I2I)":[94],"paradigm":[95],"plug-and-play":[98,166],"manner,":[99],"realizing":[100],"high-quality":[101],"versatile":[103],"I2I":[105,214],"translation":[106,215],"without":[107],"training,":[109],"fine-tuning,":[110],"or":[111],"online":[112],"optimization":[113],"process.":[114],"To":[115],"guide":[116],"T2I":[117,161],"image,":[122],"we":[123],"propose":[124],"decompose":[126],"diverse":[127],"guiding":[128,178,181],"factors":[129],"different":[131],"frequency":[132,147,197],"bands":[133],"features":[136],"DCT":[139],"spectral":[140],"space,":[141],"accordingly":[143],"devise":[144],"novel":[146],"band":[148],"substitution":[149],"layer":[150],"realizes":[152],"dynamic":[153],"result":[163],"manner.":[167],"We":[168],"demonstrate":[169],"our":[171,208],"method":[172],"allows":[173],"flexible":[174],"over":[176,210],"both":[177],"factor":[179],"intensity":[182],"simply":[187],"by":[188],"tuning":[189],"type":[191],"bandwidth":[193],"substituted":[196],"band,":[198],"respectively.":[199],"Extensive":[200],"qualitative":[201],"quantitative":[203],"experiments":[204],"verify":[205],"superiority":[206],"related":[211],"methods":[212],"visual":[216],"quality,":[217],"versatility,":[218],"controllability.":[220],"Our":[221],"project":[222],"publicly":[224],"available":[225],"at:":[226],"https://xianggao1102.github.io/FBSDiff_webpage/.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
