{"id":"https://openalex.org/W4414360085","doi":"https://doi.org/10.24963/ijcai.2025/84","title":"Instructing Text-to-Image Diffusion Models via Classifier-Guided Semantic Optimization","display_name":"Instructing Text-to-Image Diffusion Models via Classifier-Guided Semantic Optimization","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360085","doi":"https://doi.org/10.24963/ijcai.2025/84"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/84","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/84","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055511363","display_name":"Yuanyuan Chang","orcid":"https://orcid.org/0000-0001-5100-830X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuanyuan Chang","raw_affiliation_strings":["MOE Key Laboratory for Intelligent Networks and Network Security, Xi\u2019an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"MOE Key Laboratory for Intelligent Networks and Network Security, Xi\u2019an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004970804","display_name":"Yinghua Yao","orcid":"https://orcid.org/0000-0003-3204-0739"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yinghua Yao","raw_affiliation_strings":["Center for Frontier AI Research, Agency for Science, Technology and Research, Singapore","Institute of High Performance Computing, Agency for Science, Technology and Research, Singapore"],"affiliations":[{"raw_affiliation_string":"Center for Frontier AI Research, Agency for Science, Technology and Research, Singapore","institution_ids":["https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science, Technology and Research, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101760233","display_name":"Tao Qin","orcid":"https://orcid.org/0000-0002-7674-698X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Qin","raw_affiliation_strings":["MOE Key Laboratory for Intelligent Networks and Network Security, Xi\u2019an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"MOE Key Laboratory for Intelligent Networks and Network Security, Xi\u2019an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429662","display_name":"Mengmeng Wang","orcid":"https://orcid.org/0000-0002-2850-6092"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]},{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengmeng Wang","raw_affiliation_strings":["SGIT AI Lab, State Grid Corporation of China","Zhejiang University of Technology"],"affiliations":[{"raw_affiliation_string":"SGIT AI Lab, State Grid Corporation of China","institution_ids":["https://openalex.org/I17442442"]},{"raw_affiliation_string":"Zhejiang University of Technology","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021751767","display_name":"Ivor W. Tsang","orcid":"https://orcid.org/0000-0001-8095-4637"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ivor Tsang","raw_affiliation_strings":["Center for Frontier AI Research, Agency for Science, Technology and Research, Singapore","Institute of High Performance Computing, Agency for Science, Technology and Research, Singapore"],"affiliations":[{"raw_affiliation_string":"Center for Frontier AI Research, Agency for Science, Technology and Research, Singapore","institution_ids":["https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science, Technology and Research, Singapore","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102152323","display_name":"Guang Dai","orcid":"https://orcid.org/0000-0002-3529-9087"},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Dai","raw_affiliation_strings":["SGIT AI Lab, State Grid Corporation of China"],"affiliations":[{"raw_affiliation_string":"SGIT AI Lab, State Grid Corporation of China","institution_ids":["https://openalex.org/I17442442"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5055511363"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26738084,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"747","last_page":"755"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.2045000046491623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.2045000046491623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6432999968528748},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5974000096321106},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5113000273704529},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.4952000081539154},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.40139999985694885},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3995000123977661}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7865999937057495},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6432999968528748},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5974000096321106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5874999761581421},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5113000273704529},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.4952000081539154},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.40139999985694885},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4007999897003174},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3995000123977661},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.29159998893737793},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.2540999948978424},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/84","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/84","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-image":[0],"diffusion":[1,80],"models":[2,63],"have":[3],"emerged":[4],"as":[5,21,100],"powerful":[6],"tools":[7],"for":[8,32],"high-quality":[9],"image":[10],"generation":[11],"and":[12,43,109,123],"editing.":[13],"Many":[14],"existing":[15],"approaches":[16],"rely":[17],"on":[18,69],"text":[19,70],"prompts":[20,71],"editing":[22,46],"guidance.":[23],"However,":[24],"these":[25],"methods":[26],"are":[27,97],"constrained":[28],"by":[29,57],"the":[30,79,91,101],"need":[31],"manual":[33],"prompt":[34],"crafting,":[35],"which":[36],"can":[37],"be":[38],"time-consuming,":[39],"introduce":[40],"irrelevant":[41],"details,":[42],"significantly":[44],"limit":[45],"performance.":[47],"In":[48],"this":[49],"work,":[50],"we":[51],"propose":[52],"optimizing":[53],"semantic":[54,88],"embeddings":[55,89,96],"guided":[56],"attribute":[58,105],"classifiers":[59,84],"to":[60,85],"steer":[61],"text-to-image":[62],"toward":[64],"desired":[65],"edits,":[66],"without":[67],"relying":[68],"or":[72,76],"requiring":[73],"any":[74],"training":[75],"fine-tuning":[77],"of":[78,104,121,129],"model.":[81],"We":[82],"utilize":[83],"learn":[86],"precise":[87],"at":[90,134],"dataset":[92],"level.":[93],"The":[94],"learned":[95],"theoretically":[98],"justified":[99],"optimal":[102],"representation":[103],"semantics,":[106],"enabling":[107],"disentangled":[108],"accurate":[110],"edits.":[111],"Experiments":[112],"further":[113],"demonstrate":[114],"that":[115],"our":[116],"method":[117],"achieves":[118],"high":[119],"levels":[120],"disentanglement":[122],"strong":[124],"generalization":[125],"across":[126],"different":[127],"domains":[128],"data.":[130],"Code":[131],"is":[132],"available":[133],"https://github.com/Chang-yuanyuan/CASO.":[135]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
