{"id":"https://openalex.org/W4415821320","doi":"https://doi.org/10.1109/tpami.2025.3628109","title":"Refine, Control and Distill: A Text-to-Image Framework for Faithful Image Generation","display_name":"Refine, Control and Distill: A Text-to-Image Framework for Faithful Image Generation","publication_year":2025,"publication_date":"2025-11-03","ids":{"openalex":"https://openalex.org/W4415821320","doi":"https://doi.org/10.1109/tpami.2025.3628109","pmid":"https://pubmed.ncbi.nlm.nih.gov/41182940"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3628109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3628109","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100669393","display_name":"Peng Xing","orcid":"https://orcid.org/0000-0002-2307-5393"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Peng Xing","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, Nanjing University of Science and Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100387149","display_name":"Ning Wang","orcid":"https://orcid.org/0000-0002-4937-6784"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Wang","raw_affiliation_strings":["Huawei Technologies, Shenzhen, China","Huawei Technologies, China"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Technologies, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101812029","display_name":"Yanpeng Sun","orcid":"https://orcid.org/0000-0001-6249-5596"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanpeng Sun","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, Nanjing University of Science and Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035112538","display_name":"Jinhui Tang","orcid":"https://orcid.org/0000-0001-9008-222X"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinhui Tang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, Nanjing University of Science and Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017096005","display_name":"Zechao Li","orcid":"https://orcid.org/0000-0002-5341-5985"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zechao Li","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, Nanjing University of Science and Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, China","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100669393"],"corresponding_institution_ids":["https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31895878,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"48","issue":"3","first_page":"2296","last_page":"2311"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8817999958992004,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8817999958992004,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.016899999231100082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.014299999922513962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6047999858856201},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5935999751091003},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5928000211715698},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5394999980926514},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.48750001192092896},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4771000146865845},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.396699994802475},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.37880000472068787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.777400016784668},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6202999949455261},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6047999858856201},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5935999751091003},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5928000211715698},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5394999980926514},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.48750001192092896},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4771000146865845},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.37880000472068787},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36169999837875366},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3433000147342682},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3179999887943268},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.31610000133514404},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31470000743865967},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28450000286102295},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.26899999380111694},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2587999999523163}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3628109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3628109","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41182940","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41182940","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2531279109","display_name":null,"funder_award_id":"62425603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6750246608","display_name":null,"funder_award_id":"BK20240011","funder_id":"https://openalex.org/F4320334982","funder_display_name":"Basic Research Program of Jiangsu Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334982","display_name":"Basic Research Program of Jiangsu Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2963966654","https://openalex.org/W2964024144","https://openalex.org/W2982450728","https://openalex.org/W3191805365","https://openalex.org/W3216352822","https://openalex.org/W4286611269","https://openalex.org/W4312376880","https://openalex.org/W4312824283","https://openalex.org/W4312911498","https://openalex.org/W4312933868","https://openalex.org/W4360884927","https://openalex.org/W4382462760","https://openalex.org/W4385270985","https://openalex.org/W4385537492","https://openalex.org/W4385569875","https://openalex.org/W4385570204","https://openalex.org/W4386075631","https://openalex.org/W4386076027","https://openalex.org/W4386076215","https://openalex.org/W4386076425","https://openalex.org/W4386076458","https://openalex.org/W4386076532","https://openalex.org/W4386113271","https://openalex.org/W4390871953","https://openalex.org/W4390872387","https://openalex.org/W4390872671","https://openalex.org/W4390872982","https://openalex.org/W4390873054","https://openalex.org/W4390873211","https://openalex.org/W4390873875","https://openalex.org/W4390874580","https://openalex.org/W4392172801","https://openalex.org/W4402727040","https://openalex.org/W4402733577","https://openalex.org/W4402754201","https://openalex.org/W4405003032","https://openalex.org/W4415796806"],"related_works":[],"abstract_inverted_index":{"While":[0],"text-to-image":[1,55],"diffusion":[2,110,176],"models":[3],"exhibit":[4],"outstanding":[5],"results,":[6],"they":[7,42],"struggle":[8],"to":[9,33,112],"faithfully":[10],"generate":[11,45],"key":[12],"subjects":[13,72],"with":[14],"corresponding":[15],"attributes":[16],"in":[17,73,147,156],"prompts,":[18],"challenges":[19],"known":[20],"as":[21],"catastrophic":[22],"neglect":[23],"and":[24,57,78,83,102,141,159,162,169],"attribute":[25],"binding.":[26],"Previous":[27],"works":[28],"typically":[29],"utilize":[30],"attention":[31,138],"adjustments":[32],"solve":[34],"the":[35,54,94,108,114,119,127,148],"above":[36,128],"problems,":[37],"whereas":[38],"we":[39,51,97,125],"observe":[40],"that":[41,62],"may":[43],"still":[44],"unfaithful":[46],"images.":[47],"In":[48],"this":[49],"paper,":[50],"carefully":[52],"analyze":[53],"process":[56],"pinpoint":[58],"three":[59,136],"pivotal":[60],"bottlenecks":[61,120],"hinder":[63],"image":[64],"faithful":[65,158],"generation:":[66],"(1)":[67],"unequal":[68],"responses":[69],"of":[70,87,143],"neglected":[71],"text":[74,132],"embedding,":[75],"(2)":[76],"competition":[77],"entanglement":[79],"between":[80],"subjects'":[81],"attention,":[82],"(3)":[84],"suboptimal":[85],"quality":[86],"intermediate":[88,144],"features":[89,146],"from":[90],"U-Net.":[91],"Based":[92],"on":[93,172],"aforementioned":[95],"observations,":[96],"propose":[98],"a":[99,131],"Refine,":[100],"Control,":[101],"Distill":[103],"(RCD)":[104],"framework":[105],"built":[106],"upon":[107],"stable":[109],"model":[111],"alleviate":[113],"negative":[115],"effects":[116],"raised":[117],"by":[118],"mentioned":[121],"above,":[122],"respectively.":[123],"Specifically,":[124],"achieve":[126],"goals":[129],"through":[130,166],"embedding":[133],"refinement":[134],"module,":[135],"region-level":[137],"control":[139],"losses,":[140],"self-distillation":[142],"semantic":[145],"denoising":[149],"process.":[150],"Our":[151],"approach":[152],"exhibits":[153],"promising":[154],"capability":[155],"generating":[157],"high-quality":[160],"images":[161],"outperforms":[163],"state-of-the-art":[164],"methods":[165],"extensive":[167],"quantitative":[168],"qualitative":[170],"evaluations":[171],"recent":[173],"advanced":[174],"base":[175],"models.":[177]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-03T00:00:00"}
