{"id":"https://openalex.org/W4406857644","doi":"https://doi.org/10.1109/tmm.2025.3535304","title":"CLIP-GAN: Stacking CLIPs and GAN for Efficient and Controllable Text-to-Image Synthesis","display_name":"CLIP-GAN: Stacking CLIPs and GAN for Efficient and Controllable Text-to-Image Synthesis","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4406857644","doi":"https://doi.org/10.1109/tmm.2025.3535304"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3535304","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3535304","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058213096","display_name":"Yingli Hou","orcid":"https://orcid.org/0009-0002-0283-7640"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yingli Hou","raw_affiliation_strings":["Software College, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Software College, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wei Zhang","orcid":"https://orcid.org/0009-0000-7993-2430"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["Software College, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Software College, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhiliang Zhu","orcid":"https://orcid.org/0000-0003-3172-7600"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiliang Zhu","raw_affiliation_strings":["Software College, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Software College, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080895253","display_name":"Hai Yu","orcid":"https://orcid.org/0000-0002-8024-1781"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Yu","raw_affiliation_strings":["Software College, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Software College, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058213096"],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":9.7446,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.97950458,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"27","issue":null,"first_page":"3702","last_page":"3715"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stacking","display_name":"Stacking","score":0.7869352698326111},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7478016018867493},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.482964426279068},{"id":"https://openalex.org/keywords/clips","display_name":"CLIPS","score":0.45782288908958435},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38256239891052246},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37777984142303467},{"id":"https://openalex.org/keywords/optoelectronics","display_name":"Optoelectronics","score":0.3470079302787781},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.267656534910202},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07672616839408875}],"concepts":[{"id":"https://openalex.org/C33347731","wikidata":"https://www.wikidata.org/wiki/Q285210","display_name":"Stacking","level":2,"score":0.7869352698326111},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7478016018867493},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.482964426279068},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.45782288908958435},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38256239891052246},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37777984142303467},{"id":"https://openalex.org/C49040817","wikidata":"https://www.wikidata.org/wiki/Q193091","display_name":"Optoelectronics","level":1,"score":0.3470079302787781},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.267656534910202},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07672616839408875},{"id":"https://openalex.org/C46141821","wikidata":"https://www.wikidata.org/wiki/Q209402","display_name":"Nuclear magnetic resonance","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3535304","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3535304","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G391480936","display_name":null,"funder_award_id":"61402092","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4772004818","display_name":null,"funder_award_id":"61603182","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5905200325","display_name":null,"funder_award_id":"61374178","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6052427743","display_name":null,"funder_award_id":"2022-MS-123","funder_id":"https://openalex.org/F4320323086","funder_display_name":"Natural Science Foundation of Liaoning Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323086","display_name":"Natural Science Foundation of Liaoning Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1933349210","https://openalex.org/W2603777577","https://openalex.org/W2727849499","https://openalex.org/W2896457183","https://openalex.org/W2904565150","https://openalex.org/W2963115613","https://openalex.org/W2963518342","https://openalex.org/W2963966654","https://openalex.org/W2965289598","https://openalex.org/W2966792645","https://openalex.org/W3006538026","https://openalex.org/W3035574324","https://openalex.org/W3094502228","https://openalex.org/W3133431537","https://openalex.org/W3174194560","https://openalex.org/W3174525637","https://openalex.org/W3174807077","https://openalex.org/W4214485011","https://openalex.org/W4285604868","https://openalex.org/W4312388283","https://openalex.org/W4312438583","https://openalex.org/W4312651322","https://openalex.org/W4312805760","https://openalex.org/W4312911498","https://openalex.org/W4312933868","https://openalex.org/W4312977351","https://openalex.org/W4313186669","https://openalex.org/W4320008790","https://openalex.org/W4385245566","https://openalex.org/W4386065752","https://openalex.org/W4386071621","https://openalex.org/W4386076458","https://openalex.org/W4386790226","https://openalex.org/W4390874002","https://openalex.org/W4390874089","https://openalex.org/W4390874566","https://openalex.org/W4402727293","https://openalex.org/W6631190155","https://openalex.org/W6638319203","https://openalex.org/W6752378368","https://openalex.org/W6765779288","https://openalex.org/W6779879114","https://openalex.org/W6790019176","https://openalex.org/W6790978476","https://openalex.org/W6791353385","https://openalex.org/W6796242362","https://openalex.org/W6809396591","https://openalex.org/W6809885388","https://openalex.org/W6810125463","https://openalex.org/W6810940779","https://openalex.org/W6838639034","https://openalex.org/W6839643428","https://openalex.org/W6846655393","https://openalex.org/W6849119191","https://openalex.org/W6849367332","https://openalex.org/W6852162230"],"related_works":["https://openalex.org/W2417253731","https://openalex.org/W2350469024","https://openalex.org/W2491583298","https://openalex.org/W2036154621","https://openalex.org/W2327827625","https://openalex.org/W2395860100","https://openalex.org/W795077857","https://openalex.org/W2376416463","https://openalex.org/W2007338512","https://openalex.org/W4388801239"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,15,52,106,190,201],"text-to-image":[3],"synthesis":[4],"have":[5],"captivated":[6],"audiences":[7],"worldwide,":[8],"drawing":[9],"considerable":[10],"attention.":[11],"Although":[12],"significant":[13],"progress":[14],"generating":[16],"photo-realistic":[17],"images":[18],"through":[19,103],"large":[20,179],"pre-trained":[21,180],"autoregressive":[22,181],"and":[23,39,49,85,143,155,164,182,223],"diffusion":[24,183],"models,":[25,184],"these":[26,67],"models":[27],"face":[28],"three":[29],"critical":[30],"constraints:":[31],"(1)":[32],"The":[33],"requirement":[34],"for":[35,123,226],"extensive":[36],"training":[37,167],"data":[38],"numerous":[40],"model":[41,80,171],"parameters;":[42],"(2)":[43],"Inefficient,":[44],"multi-step":[45],"image":[46,126,156,202,220],"generation":[47,203,221],"process;":[48],"(3)":[50],"Difficulties":[51],"controlling":[53],"the":[54,71,77,83,88,138,144,162,197,205,212,216,224,231],"output":[55],"visual":[56,98,133],"features,":[57],"requiring":[58],"complexly":[59],"designed":[60],"prompts":[61,105],"to":[62,172,178,195],"ensure":[63],"text-image":[64],"alignment.":[65],"Addressing":[66],"challenges,":[68],"we":[69],"introduce":[70],"CLIP-GAN":[72,193],"model,":[73,217,233],"which":[74],"innovatively":[75],"integrates":[76],"pretrained":[78],"CLIP":[79,102,136],"into":[81],"both":[82],"generator":[84,95,131,163],"discriminator":[86,165],"of":[87,153,215],"GAN.":[89],"Our":[90],"architecture":[91],"includes":[92],"a":[93,107,114,187],"CLIP-based":[94,115],"that":[96],"employs":[97],"concepts":[99,134],"derived":[100],"from":[101,135],"text":[104,154],"feature":[108],"adapter":[109],"module.":[110],"We":[111],"also":[112],"propose":[113],"discriminator,":[116],"utilizing":[117],"CLIP's":[118],"advanced":[119],"scene":[120],"understanding":[121],"capabilities":[122],"more":[124],"precise":[125],"quality":[127],"evaluation.":[128],"Additionally,":[129],"our":[130,170],"applies":[132],"via":[137],"Text-based":[139],"Generator":[140],"Block":[141],"(TG-Block)":[142],"Polarized":[145],"Feature":[146],"Fusion":[147],"Module":[148],"(PFFM)":[149],"enabling":[150,169],"better":[151],"fusion":[152],"semantic":[157],"information.":[158],"This":[159],"integration":[160],"within":[161,230],"enhances":[166],"efficiency,":[168],"achieve":[173,196],"evaluation":[174],"results":[175],"not":[176],"inferior":[177],"but":[185],"with":[186],"94%":[188],"reduction":[189],"learnable":[191],"parameters.":[192],"aims":[194],"best":[198],"efficiency-accuracy":[199],"trade-off":[200],"given":[204],"limited":[206],"resource":[207],"budget.":[208],"Extensive":[209],"evaluations":[210],"validate":[211],"superior":[213],"performance":[214],"demonstrating":[218],"faster":[219],"speed":[222],"potential":[225],"greater":[227],"stylistic":[228],"diversity":[229],"GAN":[232],"while":[234],"still":[235],"preserving":[236],"its":[237],"smooth":[238],"latent":[239],"space.":[240]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
