{"id":"https://openalex.org/W4297630623","doi":"https://doi.org/10.1145/3503161.3547881","title":"DSE-GAN: Dynamic Semantic Evolution Generative Adversarial Network for Text-to-Image Generation","display_name":"DSE-GAN: Dynamic Semantic Evolution Generative Adversarial Network for Text-to-Image Generation","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4297630623","doi":"https://doi.org/10.1145/3503161.3547881"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3547881","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547881","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.01339","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101535869","display_name":"Mengqi Huang","orcid":"https://orcid.org/0000-0002-6976-9964"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mengqi Huang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023341829","display_name":"Zhendong Mao","orcid":"https://orcid.org/0000-0001-5739-8126"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Mao","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100626336","display_name":"Penghui Wang","orcid":"https://orcid.org/0000-0002-1044-5009"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Penghui Wang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418248","display_name":"Quan Wang","orcid":"https://orcid.org/0000-0001-6943-4569"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["University of Science and Technology of China, Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101535869"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":1.2576,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.86444287,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4345","last_page":"4354"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8015098571777344},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.7584121227264404},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.6244004368782043},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5819635391235352},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5646323561668396},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5502581000328064},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5301163196563721},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5024762153625488},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.4721631705760956},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.4688856303691864},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4568929672241211},{"id":"https://openalex.org/keywords/generative-adversarial-network","display_name":"Generative adversarial network","score":0.4522264301776886},{"id":"https://openalex.org/keywords/stage","display_name":"Stage (stratigraphy)","score":0.4490472078323364},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4345126152038574},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3326012194156647},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09462174773216248}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8015098571777344},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.7584121227264404},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.6244004368782043},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5819635391235352},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5646323561668396},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5502581000328064},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5301163196563721},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5024762153625488},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.4721631705760956},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.4688856303691864},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4568929672241211},{"id":"https://openalex.org/C2988773926","wikidata":"https://www.wikidata.org/wiki/Q25104379","display_name":"Generative adversarial network","level":3,"score":0.4522264301776886},{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.4490472078323364},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4345126152038574},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3326012194156647},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09462174773216248},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3503161.3547881","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547881","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2209.01339","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.01339","pdf_url":"https://arxiv.org/pdf/2209.01339","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.01339","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.01339","pdf_url":"https://arxiv.org/pdf/2209.01339","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6800000071525574,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G1565499372","display_name":null,"funder_award_id":"62121002","funder_id":"https://openalex.org/F4320322271","funder_display_name":"Science Fund for Creative Research Groups"},{"id":"https://openalex.org/G5019403743","display_name":null,"funder_award_id":"WK3480000008, WK3480000010","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5442763775","display_name":null,"funder_award_id":"U19A2057, 61876223","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322271","display_name":"Science Fund for Creative Research Groups","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2183341477","https://openalex.org/W2884751099","https://openalex.org/W2962845008","https://openalex.org/W2962851801","https://openalex.org/W2963163163","https://openalex.org/W2963184176","https://openalex.org/W2963258075","https://openalex.org/W2963612019","https://openalex.org/W2963783181","https://openalex.org/W2963966654","https://openalex.org/W2964024144","https://openalex.org/W2965289598","https://openalex.org/W2966792645","https://openalex.org/W2970562079","https://openalex.org/W2982450728","https://openalex.org/W3029699545","https://openalex.org/W3035071066","https://openalex.org/W3035424951","https://openalex.org/W3035500781","https://openalex.org/W3091653824","https://openalex.org/W3092820619","https://openalex.org/W3127393268","https://openalex.org/W3174525637","https://openalex.org/W3204647170","https://openalex.org/W3205866742","https://openalex.org/W4214485011"],"related_works":["https://openalex.org/W2995777218","https://openalex.org/W3217069185","https://openalex.org/W4308928038","https://openalex.org/W3049340819","https://openalex.org/W4200430540","https://openalex.org/W3141413246","https://openalex.org/W2888032422","https://openalex.org/W4322709305","https://openalex.org/W2808862658","https://openalex.org/W4391899165"],"abstract_inverted_index":{"Text-to-image":[0],"generation":[1,38,83],"aims":[2],"at":[3,51,137],"generating":[4],"realistic":[5],"images":[6],"which":[7,117,160],"are":[8],"semantically":[9],"consistent":[10],"with":[11],"the":[12,19,32,60,63,81,125,162,183],"given":[13],"text.":[14],"Previous":[15],"works":[16],"mainly":[17],"adopt":[18],"multi-stage":[20,106],"architecture":[21],"by":[22,145,165],"stacking":[23],"generator-discriminator":[24],"pairs":[25],"to":[26,36,73,95,123,134],"engage":[27],"multiple":[28,168],"adversarial":[29,105,169],"training,":[30],"where":[31],"text":[33,49,69,99],"semantics":[34],"used":[35,204],"provide":[37,74],"guidance":[39,79],"remain":[40],"static":[41],"across":[42],"all":[43],"stages.":[44],"This":[45],"work":[46],"argues":[47],"that":[48,192],"features":[50,100,122],"each":[52,97,138],"stage":[53,65,139],"should":[54],"be":[55,135],"adaptively":[56],"re-composed":[57,136,143],"conditioned":[58],"on":[59,201],"status":[61],"of":[62,177],"historical":[64,67,120],"(\\emphi.e.,":[66],"stage's":[68,98],"and":[70,76,128,172,180,190,196,208],"image":[71,121],"features)":[72],"diversified":[75],"accurate":[77],"semantic":[78],"during":[80],"coarse-to-fine":[82],"process.":[84],"We":[85,186],"thereby":[86],"propose":[87],"a":[88,102],"novel":[89,103],"Dynamical":[90],"Semantic":[91,113],"Evolution":[92,114],"GAN":[93],"(DSE-GAN)":[94],"re-compose":[96],"under":[101],"single":[104],"architecture.":[107],"Specifically,":[108],"we":[109],"design":[110],"(1)":[111],"Dynamic":[112],"(DSE)":[115],"module,":[116],"first":[118],"aggregates":[119],"summarize":[124],"generative":[126],"feedback,":[127],"then":[129],"dynamically":[130,146],"selects":[131],"words":[132],"required":[133],"as":[140,142],"well":[141],"them":[144],"enhancing":[147],"or":[148],"suppressing":[149],"different":[150],"granularity":[151],"subspace's":[152],"semantics.":[153],"(2)":[154],"Single":[155],"Adversarial":[156],"Multi-stage":[157],"Architecture":[158],"(SAMA),":[159],"extends":[161],"previous":[163],"structure":[164],"eliminating":[166],"complicated":[167],"training":[170],"requirements":[171],"therefore":[173],"allows":[174],"more":[175],"stages":[176],"text-image":[178],"interactions,":[179],"finally":[181],"facilitates":[182],"DSE":[184],"module.":[185],"conduct":[187],"comprehensive":[188],"experiments":[189],"show":[191],"DSE-GAN":[193],"achieves":[194],"7.48%":[195],"37.8%":[197],"relative":[198],"FID":[199],"improvement":[200],"two":[202],"widely":[203],"benchmarks,":[205],"i.e.,":[206],"CUB-200":[207],"MSCOCO,":[209],"respectively.":[210]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
