{"id":"https://openalex.org/W4404640582","doi":"https://doi.org/10.3390/computers13120305","title":"Novel Advance Image Caption Generation Utilizing Vision Transformer and Generative Adversarial Networks","display_name":"Novel Advance Image Caption Generation Utilizing Vision Transformer and Generative Adversarial Networks","publication_year":2024,"publication_date":"2024-11-22","ids":{"openalex":"https://openalex.org/W4404640582","doi":"https://doi.org/10.3390/computers13120305"},"language":"en","primary_location":{"id":"doi:10.3390/computers13120305","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers13120305","pdf_url":null,"source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/computers13120305","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010651301","display_name":"Sachin Tyagi","orcid":"https://orcid.org/0009-0008-3475-128X"},"institutions":[{"id":"https://openalex.org/I4210114149","display_name":"KR Mangalam University","ror":"https://ror.org/026b9sf88","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210114149"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shourya Tyagi","raw_affiliation_strings":["Department of Computer Science & Engineering, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India","institution_ids":["https://openalex.org/I4210114149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040601176","display_name":"Olukayode Oki","orcid":"https://orcid.org/0000-0002-6887-9782"},"institutions":[{"id":"https://openalex.org/I92579986","display_name":"Walter Sisulu University","ror":"https://ror.org/02svzjn28","country_code":"ZA","type":"education","lineage":["https://openalex.org/I92579986"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Olukayode Ayodele Oki","raw_affiliation_strings":["Information Technology Department, Walter Sisulu University, Mthatha 5117, South Africa"],"affiliations":[{"raw_affiliation_string":"Information Technology Department, Walter Sisulu University, Mthatha 5117, South Africa","institution_ids":["https://openalex.org/I92579986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055908806","display_name":"Vineet Verma","orcid":"https://orcid.org/0000-0001-8106-9496"},"institutions":[{"id":"https://openalex.org/I4210114149","display_name":"KR Mangalam University","ror":"https://ror.org/026b9sf88","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210114149"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vineet Verma","raw_affiliation_strings":["Department of Computer Science & Engineering, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India","institution_ids":["https://openalex.org/I4210114149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101778338","display_name":"Swati Gupta","orcid":"https://orcid.org/0000-0002-9450-6381"},"institutions":[{"id":"https://openalex.org/I4210114149","display_name":"KR Mangalam University","ror":"https://ror.org/026b9sf88","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210114149"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Swati Gupta","raw_affiliation_strings":["Department of Computer Science & Engineering, Member of Centre of Excellence AI, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, Member of Centre of Excellence AI, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India","institution_ids":["https://openalex.org/I4210114149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022920736","display_name":"Meenu Vijarania","orcid":"https://orcid.org/0000-0003-0206-5927"},"institutions":[{"id":"https://openalex.org/I4210114149","display_name":"KR Mangalam University","ror":"https://ror.org/026b9sf88","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210114149"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Meenu Vijarania","raw_affiliation_strings":["Department of Computer Science & Engineering, Member of Centre of Excellence AI, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, Member of Centre of Excellence AI, School of Engineering and Technology, K. R. Mangalam University, Gurugram 122103, India","institution_ids":["https://openalex.org/I4210114149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085915629","display_name":"Joseph Bamidele Awotunde","orcid":null},"institutions":[{"id":"https://openalex.org/I134542131","display_name":"University of Ilorin","ror":"https://ror.org/032kdwk38","country_code":"NG","type":"education","lineage":["https://openalex.org/I134542131"]}],"countries":["NG"],"is_corresponding":true,"raw_author_name":"Joseph Bamidele Awotunde","raw_affiliation_strings":["Department of Computer Science, Faculty of Information and Communication Sciences, University of Ilorin, Ilorin 240003, Nigeria"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Faculty of Information and Communication Sciences, University of Ilorin, Ilorin 240003, Nigeria","institution_ids":["https://openalex.org/I134542131"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037860157","display_name":"Abdulrauph Olanrewaju Babatunde","orcid":"https://orcid.org/0000-0003-3247-0480"},"institutions":[{"id":"https://openalex.org/I134542131","display_name":"University of Ilorin","ror":"https://ror.org/032kdwk38","country_code":"NG","type":"education","lineage":["https://openalex.org/I134542131"]}],"countries":["NG"],"is_corresponding":false,"raw_author_name":"Abdulrauph Olanrewaju Babatunde","raw_affiliation_strings":["Department of Computer Science, Faculty of Information and Communication Sciences, University of Ilorin, Ilorin 240003, Nigeria"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Faculty of Information and Communication Sciences, University of Ilorin, Ilorin 240003, Nigeria","institution_ids":["https://openalex.org/I134542131"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5022920736","https://openalex.org/A5085915629"],"corresponding_institution_ids":["https://openalex.org/I134542131","https://openalex.org/I4210114149"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":1.2798,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.82096566,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"13","issue":"12","first_page":"305","last_page":"305"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.900580644607544},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8784377574920654},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6687124967575073},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5803920030593872},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5625928640365601},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5269960761070251},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.412982314825058},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3848083019256592},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36683404445648193},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33313849568367004},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3316471576690674}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.900580644607544},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8784377574920654},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6687124967575073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5803920030593872},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5625928640365601},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5269960761070251},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.412982314825058},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3848083019256592},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36683404445648193},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33313849568367004},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3316471576690674},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/computers13120305","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers13120305","pdf_url":null,"source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:e4f60175ea9d41c0be88419c3c23135e","is_oa":true,"landing_page_url":"https://doaj.org/article/e4f60175ea9d41c0be88419c3c23135e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computers, Vol 13, Iss 12, p 305 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/computers13120305","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers13120305","pdf_url":null,"source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4399999976158142,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2146502635","https://openalex.org/W2185175083","https://openalex.org/W2242218935","https://openalex.org/W2277195237","https://openalex.org/W2613332842","https://openalex.org/W2739748921","https://openalex.org/W2902887378","https://openalex.org/W2903049941","https://openalex.org/W2945870855","https://openalex.org/W3089973229","https://openalex.org/W3101313921","https://openalex.org/W3158091094","https://openalex.org/W3195346735","https://openalex.org/W4297371741","https://openalex.org/W4320013936","https://openalex.org/W4366393884","https://openalex.org/W4386162736","https://openalex.org/W4387451388","https://openalex.org/W4392903201","https://openalex.org/W4394910749","https://openalex.org/W4399528683","https://openalex.org/W4402422583","https://openalex.org/W4404420415","https://openalex.org/W6681435938","https://openalex.org/W6777254788"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W3209117276","https://openalex.org/W4388184981","https://openalex.org/W4323777661","https://openalex.org/W4324290231"],"abstract_inverted_index":{"In":[0],"this":[1,145],"paper,":[2],"we":[3,36],"propose":[4],"a":[5,81,209,221,250],"novel":[6],"method":[7],"for":[8,129],"producing":[9],"image":[10,49,246],"captions":[11],"through":[12],"the":[13,38,43,47,52,58,86,102,106,114,121,134,169,174,184,214,226,238,254],"utilization":[14],"of":[15,42,51,75,105,136,154,173,186,200,216,229,240],"Generative":[16],"Adversarial":[17],"Networks":[18],"(GANs)":[19],"and":[20,31,92,98,157,171,197,231],"Vision":[21],"Transformers":[22],"(ViTs)":[23],"using":[24,71,80,101],"our":[25,62,137,147,162,187,206],"proposed":[26,63,138],"Image":[27],"Captioning":[28],"Utilizing":[29],"Transformer":[30],"GAN":[32],"(ICTGAN)":[33],"model.":[34,139],"Here":[35],"use":[37],"efficient":[39],"representation":[40],"learning":[41],"ViTs":[44],"to":[45,88,132,166],"improve":[46],"realistic":[48],"production":[50],"GAN.":[53],"Using":[54],"textual":[55,97],"features":[56,76],"from":[57,69,95],"LSTM-based":[59],"language":[60],"model,":[61],"model":[64,87,163],"combines":[65],"salient":[66],"information":[67],"extracted":[68],"images":[70],"ViTs.":[72],"This":[73,235],"merging":[74],"is":[77,164,242],"made":[78],"possible":[79,244],"self-attention":[82,103,107,233],"mechanism,":[83],"which":[84,124,189],"enables":[85],"efficiently":[89],"take":[90],"in":[91,152,168,213,245,253],"process":[93],"data":[94],"both":[96],"visual":[99],"sources":[100],"properties":[104],"mechanism.":[108],"We":[109,181],"perform":[110],"various":[111],"tests":[112],"on":[113,144],"MS":[115],"COCO":[116],"dataset":[117],"as":[118,120],"well":[119],"Flickr30k":[122],"dataset,":[123,146],"are":[125],"popular":[126],"benchmark":[127],"datasets":[128],"image-captioning":[130],"tasks,":[131],"verify":[133],"effectiveness":[135],"The":[140],"outcomes":[141],"represent":[142],"that,":[143],"algorithm":[148],"outperforms":[149],"other":[150],"approaches":[151],"terms":[153],"relevance,":[155],"diversity,":[156],"caption":[158,195,218,247],"quality.":[159],"With":[160],"this,":[161],"robust":[165],"changes":[167],"content":[170],"style":[172],"images,":[175],"demonstrating":[176],"its":[177],"excellent":[178],"generalization":[179],"skills.":[180],"also":[182],"explain":[183],"benefits":[185],"method,":[188],"include":[190],"better":[191,194,198],"visual\u2013textual":[192],"alignment,":[193],"coherence,":[196],"handling":[199],"complicated":[201],"scenarios.":[202],"All":[203],"things":[204],"considered,":[205],"work":[207,236],"represents":[208],"significant":[210],"step":[211],"forward":[212],"field":[215],"picture":[217],"creation,":[219],"offering":[220],"complete":[222],"solution":[223],"that":[224],"leverages":[225],"complementary":[227],"advantages":[228],"GANs":[230],"ViT-based":[232],"models.":[234],"pushes":[237],"limits":[239],"what":[241],"currently":[243],"generation,":[248],"creating":[249],"new":[251],"standard":[252],"industry.":[255]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
