{"id":"https://openalex.org/W4387409379","doi":"https://doi.org/10.1007/s00521-023-09021-x","title":"Swinv2-Imagen: hierarchical vision transformer diffusion models for text-to-image generation","display_name":"Swinv2-Imagen: hierarchical vision transformer diffusion models for text-to-image generation","publication_year":2023,"publication_date":"2023-10-06","ids":{"openalex":"https://openalex.org/W4387409379","doi":"https://doi.org/10.1007/s00521-023-09021-x"},"language":"en","primary_location":{"id":"doi:10.1007/s00521-023-09021-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-023-09021-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-023-09021-x.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00521-023-09021-x.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100750027","display_name":"Ruijun Li","orcid":"https://orcid.org/0000-0002-3743-6567"},"institutions":[{"id":"https://openalex.org/I39854758","display_name":"Auckland University of Technology","ror":"https://ror.org/01zvqw119","country_code":"NZ","type":"education","lineage":["https://openalex.org/I39854758"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"Ruijun Li","raw_affiliation_strings":["Auckland University of Technology, Auckland, 1010, New Zealand"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Auckland University of Technology, Auckland, 1010, New Zealand","institution_ids":["https://openalex.org/I39854758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044008966","display_name":"Weihua Li","orcid":"https://orcid.org/0000-0001-9215-4979"},"institutions":[{"id":"https://openalex.org/I39854758","display_name":"Auckland University of Technology","ror":"https://ror.org/01zvqw119","country_code":"NZ","type":"education","lineage":["https://openalex.org/I39854758"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Weihua Li","raw_affiliation_strings":["Auckland University of Technology, Auckland, 1010, New Zealand"],"raw_orcid":"https://orcid.org/0000-0001-9215-4979","affiliations":[{"raw_affiliation_string":"Auckland University of Technology, Auckland, 1010, New Zealand","institution_ids":["https://openalex.org/I39854758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039924074","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-8917-2196"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["Hefei University of Technology, Hefei, 230601, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hefei University of Technology, Hefei, 230601, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hanyu Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I129801699","display_name":"University of Tasmania","ror":"https://ror.org/01nfmeh72","country_code":"AU","type":"education","lineage":["https://openalex.org/I129801699"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hanyu Wei","raw_affiliation_strings":["University of Tasmania, Hobart, 7005, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tasmania, Hobart, 7005, Australia","institution_ids":["https://openalex.org/I129801699"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028333907","display_name":"Jianhua Jiang","orcid":"https://orcid.org/0000-0002-9149-2922"},"institutions":[{"id":"https://openalex.org/I179324530","display_name":"Jilin University of Finance and Economics","ror":"https://ror.org/04az9eh24","country_code":"CN","type":"education","lineage":["https://openalex.org/I179324530"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhua Jiang","raw_affiliation_strings":["Jilin University of Finance and Economics, Changchun, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University of Finance and Economics, Changchun, China","institution_ids":["https://openalex.org/I179324530"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029548157","display_name":"Quan Bai","orcid":"https://orcid.org/0000-0003-1214-6317"},"institutions":[{"id":"https://openalex.org/I129801699","display_name":"University of Tasmania","ror":"https://ror.org/01nfmeh72","country_code":"AU","type":"education","lineage":["https://openalex.org/I129801699"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Quan Bai","raw_affiliation_strings":["University of Tasmania, Hobart, 7005, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tasmania, Hobart, 7005, Australia","institution_ids":["https://openalex.org/I129801699"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100750027"],"corresponding_institution_ids":["https://openalex.org/I39854758"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":2.6494,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.91934559,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"36","issue":"28","first_page":"17245","last_page":"17260"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8308401107788086},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6237159967422485},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.545012354850769},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5117430686950684},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.49244388937950134},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.45011553168296814},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.413467675447464},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39316773414611816},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.20963630080223083},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1326349675655365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8308401107788086},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6237159967422485},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.545012354850769},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5117430686950684},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.49244388937950134},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45011553168296814},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.413467675447464},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39316773414611816},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.20963630080223083},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1326349675655365},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00521-023-09021-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-023-09021-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-023-09021-x.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00521-023-09021-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-023-09021-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-023-09021-x.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5099999904632568}],"awards":[],"funders":[{"id":"https://openalex.org/F4320310339","display_name":"Auckland University of Technology, New Zealand","ror":"https://ror.org/01zvqw119"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387409379.pdf"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2054829889","https://openalex.org/W2077069816","https://openalex.org/W2157331557","https://openalex.org/W2250378130","https://openalex.org/W2594833348","https://openalex.org/W2774320778","https://openalex.org/W2807697862","https://openalex.org/W2884367402","https://openalex.org/W2884436604","https://openalex.org/W2905338897","https://openalex.org/W2928133111","https://openalex.org/W2962756421","https://openalex.org/W2963101956","https://openalex.org/W2963163163","https://openalex.org/W2963184176","https://openalex.org/W2965289598","https://openalex.org/W2965833116","https://openalex.org/W2966792645","https://openalex.org/W2971865858","https://openalex.org/W2992478697","https://openalex.org/W2996290406","https://openalex.org/W3004349648","https://openalex.org/W3006538026","https://openalex.org/W3015788359","https://openalex.org/W3030515889","https://openalex.org/W3034667500","https://openalex.org/W3035500781","https://openalex.org/W3035665735","https://openalex.org/W3035750252","https://openalex.org/W3080642835","https://openalex.org/W3087257704","https://openalex.org/W3102554291","https://openalex.org/W3107848485","https://openalex.org/W3143894246","https://openalex.org/W3174194560","https://openalex.org/W3174525637","https://openalex.org/W3209047863","https://openalex.org/W3212516020","https://openalex.org/W4200498145","https://openalex.org/W4225495512","https://openalex.org/W4226278310","https://openalex.org/W4290878206","https://openalex.org/W4312282373","https://openalex.org/W4312349930","https://openalex.org/W4312388283","https://openalex.org/W4312438583","https://openalex.org/W4312561757","https://openalex.org/W4312911498","https://openalex.org/W4312933868","https://openalex.org/W4312977351","https://openalex.org/W6600234944","https://openalex.org/W6601055912","https://openalex.org/W6777078701"],"related_works":["https://openalex.org/W3147584709","https://openalex.org/W2182785089","https://openalex.org/W2977677679","https://openalex.org/W4312178642","https://openalex.org/W1992327129","https://openalex.org/W4387838477","https://openalex.org/W2067193074","https://openalex.org/W2381986121","https://openalex.org/W3107426390","https://openalex.org/W3088721469"],"abstract_inverted_index":{"Abstract":[0],"Recently,":[1],"diffusion":[2,91,124],"models":[3],"have":[4],"been":[5],"proven":[6],"to":[7,161],"perform":[8],"remarkably":[9],"well":[10],"in":[11,15,77,122],"text-to-image":[12,42,90],"synthesis":[13],"tasks":[14],"a":[16,48,88,95,100,104,140],"number":[17],"of":[18,62,114,130,135,165],"studies,":[19],"immediately":[20],"presenting":[21],"new":[22],"study":[23],"opportunities":[24],"for":[25,41,52],"image":[26,78],"generation.":[27,43],"Google\u2019s":[28],"Imagen":[29,45,71],"follows":[30],"this":[31],"research":[32],"trend":[33],"and":[34,99,116,120,177],"outperforms":[35,188],"DALLE2":[36],"as":[37],"the":[38,59,63,66,74,86,108,111,123,128,149,153,163,166,184],"best":[39,75],"model":[40,51,92,168,187],"However,":[44],"merely":[46],"uses":[47],"T5":[49],"language":[50],"text":[53],"processing,":[54],"which":[55,146],"cannot":[56],"ensure":[57],"learning":[58],"semantic":[60,105],"information":[61],"text.":[64],"Furthermore,":[65],"Efficient":[67],"UNet":[68,142],"leveraged":[69],"by":[70,169],"is":[72],"not":[73],"choice":[76],"processing.":[79],"To":[80],"address":[81,148],"these":[82],"issues,":[83],"we":[84,137],"propose":[85],"Swinv2-Imagen,":[87],"novel":[89],"based":[93],"on":[94],"Hierarchical":[96],"Visual":[97],"Transformer":[98],"Scene":[101],"Graph":[102],"incorporating":[103],"layout.":[106],"In":[107],"proposed":[109,167,185],"model,":[110,125],"feature":[112],"vectors":[113],"entities":[115],"relationships":[117],"are":[118,159],"extracted":[119],"involved":[121],"effectively":[126],"improving":[127],"quality":[129],"generated":[131],"images.":[132],"On":[133],"top":[134],"that,":[136],"also":[138],"introduce":[139],"Swin-Transformer-based":[141],"architecture,":[143],"called":[144],"Swinv2-Unet,":[145],"can":[147],"problems":[150],"stemming":[151],"from":[152],"CNN":[154],"convolution":[155],"operations.":[156],"Extensive":[157],"experiments":[158],"conducted":[160],"evaluate":[162],"performance":[164],"using":[170],"three":[171],"real-world":[172],"datasets,":[173],"i.e.":[174],"MSCOCO,":[175],"CUB":[176],"MM-CelebA-HQ.":[178],"The":[179],"experimental":[180],"results":[181],"show":[182],"that":[183],"Swinv2-Imagen":[186],"several":[189],"popular":[190],"state-of-the-art":[191],"methods.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3}],"updated_date":"2026-05-08T15:41:06.802602","created_date":"2025-10-10T00:00:00"}
