{"id":"https://openalex.org/W4390357625","doi":"https://doi.org/10.1109/tcsvt.2023.3347971","title":"Attention-Bridged Modal Interaction for Text-to-Image Generation","display_name":"Attention-Bridged Modal Interaction for Text-to-Image Generation","publication_year":2023,"publication_date":"2023-12-28","ids":{"openalex":"https://openalex.org/W4390357625","doi":"https://doi.org/10.1109/tcsvt.2023.3347971"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3347971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3347971","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072487632","display_name":"Hongchen Tan","orcid":"https://orcid.org/0000-0001-6915-8736"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongchen Tan","raw_affiliation_strings":["Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020527092","display_name":"Baocai Yin","orcid":"https://orcid.org/0000-0003-3121-1823"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baocai Yin","raw_affiliation_strings":["Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000168148","display_name":"Kaiqiang Xu","orcid":"https://orcid.org/0000-0001-6124-6328"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiqiang Xu","raw_affiliation_strings":["School of Mathematical Sciences, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101612429","display_name":"Huasheng Wang","orcid":"https://orcid.org/0009-0003-9290-8445"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Huasheng Wang","raw_affiliation_strings":["School of Computer Science and Informatics, Cardiff University, Cardiff, U.K","School of Computer Science and Informatics, Cardiff University, Cardiff, Britain"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Informatics, Cardiff University, Cardiff, U.K","institution_ids":["https://openalex.org/I79510175"]},{"raw_affiliation_string":"School of Computer Science and Informatics, Cardiff University, Cardiff, Britain","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100645085","display_name":"Xiuping Liu","orcid":"https://orcid.org/0000-0003-1712-6083"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiuping Liu","raw_affiliation_strings":["School of Mathematical Sciences, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100353783","display_name":"Xin Li","orcid":"https://orcid.org/0000-0002-0144-9489"},"institutions":[{"id":"https://openalex.org/I83740829","display_name":"School of Visual Arts","ror":"https://ror.org/0437v2m88","country_code":"US","type":"education","lineage":["https://openalex.org/I83740829"]},{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin Li","raw_affiliation_strings":["Section of Visual Computing and Creative Media, School of Performance, Visualization, and Fine Arts, Texas A&#x0026;M University, College Station, TX, USA","Section of Visual Computing and Creative Media, School of Performance, Visualization, and Fine Arts, Texas A and M University, College Station, Texas, United States of America"],"affiliations":[{"raw_affiliation_string":"Section of Visual Computing and Creative Media, School of Performance, Visualization, and Fine Arts, Texas A&#x0026;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I83740829"]},{"raw_affiliation_string":"Section of Visual Computing and Creative Media, School of Performance, Visualization, and Fine Arts, Texas A and M University, College Station, Texas, United States of America","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5072487632"],"corresponding_institution_ids":["https://openalex.org/I37796252"],"apc_list":null,"apc_paid":null,"fwci":1.4738,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.84953676,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"34","issue":"7","first_page":"5400","last_page":"5413"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7869848608970642},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.6658123731613159},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.6206038594245911},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5393365025520325},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5231860280036926},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5071494579315186},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4950622618198395},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45623326301574707},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.36552369594573975},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32478833198547363}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7869848608970642},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.6658123731613159},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.6206038594245911},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5393365025520325},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5231860280036926},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5071494579315186},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4950622618198395},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45623326301574707},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36552369594573975},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32478833198547363},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3347971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3347971","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G241052843","display_name":null,"funder_award_id":"BX20220025","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G4067581391","display_name":null,"funder_award_id":"2021M700303","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G4332995159","display_name":null,"funder_award_id":"CBET-2115405","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5870467190","display_name":null,"funder_award_id":"2022-ZZ-069","funder_id":"https://openalex.org/F4320323068","funder_display_name":"Beijing Postdoctoral Science Foundation"},{"id":"https://openalex.org/G6748698568","display_name":null,"funder_award_id":"62201020","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320323068","display_name":"Beijing Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W1797268635","https://openalex.org/W1861492603","https://openalex.org/W2127589108","https://openalex.org/W2138621090","https://openalex.org/W2502225121","https://openalex.org/W2785678896","https://openalex.org/W2962845008","https://openalex.org/W2963163163","https://openalex.org/W2963184176","https://openalex.org/W2963373786","https://openalex.org/W2963413689","https://openalex.org/W2963612019","https://openalex.org/W2963966654","https://openalex.org/W2964024144","https://openalex.org/W2964216930","https://openalex.org/W2965289598","https://openalex.org/W2966792645","https://openalex.org/W2982450728","https://openalex.org/W2986510250","https://openalex.org/W2987919422","https://openalex.org/W3009811209","https://openalex.org/W3034633089","https://openalex.org/W3035500781","https://openalex.org/W3096601784","https://openalex.org/W3099614098","https://openalex.org/W3127393268","https://openalex.org/W3134582802","https://openalex.org/W3159521921","https://openalex.org/W3174194560","https://openalex.org/W3174525637","https://openalex.org/W3175528029","https://openalex.org/W3180355996","https://openalex.org/W3215495615","https://openalex.org/W4205219932","https://openalex.org/W4214485011","https://openalex.org/W4224035735","https://openalex.org/W4224281861","https://openalex.org/W4283388932","https://openalex.org/W4288089799","https://openalex.org/W4312388283","https://openalex.org/W4312438583","https://openalex.org/W4312933868","https://openalex.org/W4312977351","https://openalex.org/W4385245566","https://openalex.org/W4386065752","https://openalex.org/W6638319203","https://openalex.org/W6713645886","https://openalex.org/W6718379498","https://openalex.org/W6728889164","https://openalex.org/W6741383107","https://openalex.org/W6752910514","https://openalex.org/W6762931180","https://openalex.org/W6765779288","https://openalex.org/W6767384525","https://openalex.org/W6769627184","https://openalex.org/W6779823529","https://openalex.org/W6781951827","https://openalex.org/W6788990321","https://openalex.org/W6790978476","https://openalex.org/W6791276965","https://openalex.org/W6791353385","https://openalex.org/W6795288823","https://openalex.org/W6796242362","https://openalex.org/W6797359156","https://openalex.org/W6800989748","https://openalex.org/W6809885388","https://openalex.org/W6810125463","https://openalex.org/W6810940779","https://openalex.org/W6838639034","https://openalex.org/W6839643428"],"related_works":["https://openalex.org/W3110074278","https://openalex.org/W2953246223","https://openalex.org/W4293320219","https://openalex.org/W4283584549","https://openalex.org/W2618858825","https://openalex.org/W2554314924","https://openalex.org/W2998859928","https://openalex.org/W3151498616","https://openalex.org/W4381885966","https://openalex.org/W4308217387"],"abstract_inverted_index":{"We":[0],"propose":[1],"a":[2,38,48,78,132,138],"novel":[3,29,86,125],"Text-to-Image":[4],"Generation":[5],"Network,":[6],"Attention-bridged":[7,32],"Modal":[8,33],"Interaction":[9,34],"Generative":[10],"Adversarial":[11],"Network":[12],"(AMI-GAN),":[13],"to":[14,52,63,90,94,112],"better":[15,64,95],"explore":[16],"modal":[17],"interaction":[18],"and":[19,37,57,61,67,102,137],"perception":[20,81],"for":[21],"high-quality":[22],"image":[23],"synthesis.":[24],"The":[25,144],"AMI-GAN":[26],"contains":[27],"two":[28,130,167],"designs:":[30],"an":[31],"(AMI)":[35],"module":[36],"Residual":[39],"Perception":[40],"Discriminator":[41],"(RPD).":[42],"In":[43,74],"AMI,":[44],"we":[45,76,127],"mainly":[46],"design":[47,77,129],"multi-scale":[49,79],"attention":[50],"mechanism":[51,82],"exploit":[53],"semantics":[54,69],"alignment,":[55],"fusion,":[56],"enhancement":[58],"between":[59,99],"text":[60],"image,":[62],"refine":[65],"details":[66],"context":[68],"of":[70,117,151],"the":[71,92,100,106,110,114,118,156],"synthesized":[72,103,119],"image.":[73,104,120],"RPD,":[75],"information":[80,87],"with":[83,161],"our":[84,174],"proposed":[85],"adjustment":[88],"function,":[89],"encourage":[91],"discriminator":[93,107],"perceive":[96],"visual":[97,115],"differences":[98],"real":[101],"Consequently,":[105],"will":[108],"drive":[109],"generator":[111],"improve":[113],"quality":[116],"Besides,":[121],"based":[122],"on":[123,166],"these":[124],"designs,":[126],"can":[128,146,158],"versions,":[131],"single-stage":[133],"generation":[134,140],"framework":[135,141],"(AMI-GAN-S),":[136],"multi-stage":[139],"(AMI-GAN-M),":[142],"respectively.":[143],"former":[145],"synthesize":[147,159],"high-resolution":[148],"images":[149,160],"because":[150],"its":[152],"low":[153],"computational":[154],"cost;":[155],"latter":[157],"realistic":[162],"detail.":[163],"Experimental":[164],"results":[165],"widely":[168],"used":[169],"T2I":[170,180],"datasets":[171],"showed":[172],"that":[173],"AMI-GANs":[175],"achieve":[176],"competitive":[177],"performance":[178],"in":[179],"task.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
