{"id":"https://openalex.org/W4400315661","doi":"https://doi.org/10.1109/isivc61350.2024.10577779","title":"A Comparative Study of Text-to-Image Generative Models","display_name":"A Comparative Study of Text-to-Image Generative Models","publication_year":2024,"publication_date":"2024-05-21","ids":{"openalex":"https://openalex.org/W4400315661","doi":"https://doi.org/10.1109/isivc61350.2024.10577779"},"language":"en","primary_location":{"id":"doi:10.1109/isivc61350.2024.10577779","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isivc61350.2024.10577779","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 12th International Symposium on Signal, Image, Video and Communications (ISIVC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017364044","display_name":"Imran Shafiq Ahmad","orcid":null},"institutions":[{"id":"https://openalex.org/I74413500","display_name":"University of Windsor","ror":"https://ror.org/01gw3d370","country_code":"CA","type":"education","lineage":["https://openalex.org/I74413500"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Imran Shafiq Ahmad","raw_affiliation_strings":["University of Windsor,School of Computer Science,Windsor,ON,Canada,N9B 3P4"],"affiliations":[{"raw_affiliation_string":"University of Windsor,School of Computer Science,Windsor,ON,Canada,N9B 3P4","institution_ids":["https://openalex.org/I74413500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102380263","display_name":"Nazia Siddiqui","orcid":null},"institutions":[{"id":"https://openalex.org/I74413500","display_name":"University of Windsor","ror":"https://ror.org/01gw3d370","country_code":"CA","type":"education","lineage":["https://openalex.org/I74413500"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Nazia Siddiqui","raw_affiliation_strings":["University of Windsor,School of Computer Science,Windsor,ON,Canada,N9B 3P4"],"affiliations":[{"raw_affiliation_string":"University of Windsor,School of Computer Science,Windsor,ON,Canada,N9B 3P4","institution_ids":["https://openalex.org/I74413500"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062831830","display_name":"Boubakeur Boufama","orcid":"https://orcid.org/0000-0003-0117-5614"},"institutions":[{"id":"https://openalex.org/I74413500","display_name":"University of Windsor","ror":"https://ror.org/01gw3d370","country_code":"CA","type":"education","lineage":["https://openalex.org/I74413500"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Boubakeur Boufama","raw_affiliation_strings":["University of Windsor,School of Computer Science,Windsor,ON,Canada,N9B 3P4"],"affiliations":[{"raw_affiliation_string":"University of Windsor,School of Computer Science,Windsor,ON,Canada,N9B 3P4","institution_ids":["https://openalex.org/I74413500"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5017364044"],"corresponding_institution_ids":["https://openalex.org/I74413500"],"apc_list":null,"apc_paid":null,"fwci":0.2787,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54219144,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12698","display_name":"3D Modeling in Geospatial Applications","score":0.7910000085830688,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12698","display_name":"3D Modeling in Geospatial Applications","score":0.7910000085830688,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7196738719940186},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.624234676361084},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49479907751083374},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4913288652896881},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49090415239334106}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7196738719940186},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.624234676361084},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49479907751083374},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4913288652896881},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49090415239334106}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isivc61350.2024.10577779","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isivc61350.2024.10577779","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 12th International Symposium on Signal, Image, Video and Communications (ISIVC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1909320841","https://openalex.org/W2951523806","https://openalex.org/W2963143316","https://openalex.org/W2982450728","https://openalex.org/W3036167779","https://openalex.org/W3121972911","https://openalex.org/W3137092581","https://openalex.org/W3162926177","https://openalex.org/W4226125322","https://openalex.org/W4281485151","https://openalex.org/W4387195417","https://openalex.org/W6621378261","https://openalex.org/W6639118987","https://openalex.org/W6640963894","https://openalex.org/W6679045638","https://openalex.org/W6683074461","https://openalex.org/W6713645886","https://openalex.org/W6747733185","https://openalex.org/W6779823529","https://openalex.org/W6790978476","https://openalex.org/W6795288823","https://openalex.org/W6810940779","https://openalex.org/W6838639034"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2380075625","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"of":[2,30,36,59,76,106,118,147,158,167,181,197,207,244,281,304],"deep":[3],"learning":[4,90,201],"(DL)":[5],"techniques":[6,203],"have":[7],"revolutionized":[8],"various":[9],"fields":[10],"such":[11],"as":[12,267],"computer":[13],"vision,":[14],"image":[15,37,291],"processing,":[16],"artificial":[17],"intelligence,":[18],"and":[19,88,137,145,152,165,185,220,254],"natural":[20],"language":[21],"processing.":[22],"One":[23],"notable":[24],"application":[25],"that":[26,79],"showcases":[27],"the":[28,34,57,67,104,119,156,163,205,224,237,279,282,286,290,305,313],"power":[29],"these":[31],"algorithms":[32],"is":[33,98,114,223,241,247,264,296],"field":[35],"synthesis,":[38],"where":[39,103],"new":[40,132],"images":[41,209,246,284,307],"are":[42,72,177],"created":[43],"from":[44,66,210,236],"textual":[45,126,211],"descriptions.":[46],"Generative":[47],"models":[48,71,78,154,169],"play":[49],"a":[50,73,115,143,179,194,268],"crucial":[51],"role":[52],"in":[53,101,170,228],"this":[54,93,174,229],"process,":[55],"enabling":[56],"generation":[58,120,292],"novel":[60],"data":[61,107,239],"based":[62],"on":[63],"patterns":[64],"learned":[65],"training":[68],"set.":[69],"Diffusion":[70,217],"distinctive":[74],"class":[75],"generative":[77,148],"operate":[80],"by":[81,277],"introducing":[82],"random":[83],"noise":[84],"to":[85,91,124,161,215,270,285,300],"existing":[86],"data,":[87],"subsequently":[89],"reverse":[92],"diffusion":[94,153,225],"process.":[95,121,293],"This":[96,140,191,294],"technique":[97],"particularly":[99],"valuable":[100],"scenarios":[102],"transformation":[105],"over":[108],"time":[109],"or":[110],"through":[111],"sequential":[112],"steps":[113],"critical":[116],"aspect":[117],"The":[122],"ability":[123],"translate":[125],"descriptions":[127],"into":[128],"visual":[129,302],"representations":[130],"offers":[131],"possibilities":[133],"for":[134,204,218,275],"human-computer":[135],"interaction":[136],"creative":[138],"expressions.":[139],"paper":[141],"provides":[142,193],"comparison":[144],"analysis":[146,276],"adversarial":[149],"networks":[150],"(GANs)":[151],"within":[155],"domain":[157],"\u201ctext-to-image":[159],"generation\u201d":[160],"understand":[162],"strength":[164],"weaknesses":[166],"different":[168],"specific":[171],"contexts.":[172],"For":[173,231],"purpose,":[175],"we":[176],"using":[178,249],"combination":[180,192],"Vector-Quantized":[182],"GAN":[183],"(VQGAN)":[184],"Contrastive":[186],"Language-Image":[187],"Pre-training":[188],"(CLIP)":[189],"model.":[190],"powerful":[195],"integration":[196],"two":[198],"distinct":[199],"machine":[200],"(ML)":[202],"purpose":[206],"creating":[208],"input.":[212],"Guided":[213],"Language":[214],"Image":[216],"Generation":[219],"Editing":[221],"(GLIDE)":[222],"model":[226],"used":[227,266],"study.":[230],"both":[232],"models,":[233],"text":[234],"input":[235],"MS-COCO":[238],"set":[240],"used.":[242],"Evaluation":[243],"generated":[245,283,306],"performed":[248],"Fr\u00e9chet":[250],"Inception":[251,255],"Distance":[252],"(FID)":[253],"Score":[256],"(IS)":[257],"metrics.":[258],"Semantic":[259],"object":[260],"accuracy":[261],"score":[262],"(SOA)":[263],"also":[265,309],"metric":[269,295],"add":[271],"an":[272],"additional":[273],"layer":[274],"considering":[278],"relevance":[280],"provided":[287],"captions":[288],"during":[289],"helpful":[297],"not":[298],"only":[299],"assessing":[301],"quality":[303],"but":[308],"their":[310],"alignment":[311],"with":[312],"intended":[314],"semantic":[315],"content.":[316]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
