{"id":"https://openalex.org/W4391097126","doi":"https://doi.org/10.1109/access.2024.3356551","title":"A Study of ConvNeXt Architectures for Enhanced Image Captioning","display_name":"A Study of ConvNeXt Architectures for Enhanced Image Captioning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391097126","doi":"https://doi.org/10.1109/access.2024.3356551"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3356551","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3356551","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10410861.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10410861.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000019015","display_name":"Leo Ramos","orcid":"https://orcid.org/0000-0001-7107-7943"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leo Ramos","raw_affiliation_strings":["Kauel Inc., Menlo Park, Silicon Valley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0001-7107-7943","affiliations":[{"raw_affiliation_string":"Kauel Inc., Menlo Park, Silicon Valley, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053320427","display_name":"Edmundo Casas","orcid":"https://orcid.org/0000-0003-2704-0670"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Edmundo Casas","raw_affiliation_strings":["Kauel Inc., Menlo Park, Silicon Valley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-2704-0670","affiliations":[{"raw_affiliation_string":"Kauel Inc., Menlo Park, Silicon Valley, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038415919","display_name":"Cristian Romero","orcid":"https://orcid.org/0009-0009-0930-2535"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cristian Romero","raw_affiliation_strings":["Kauel Inc., Menlo Park, Silicon Valley, CA, USA"],"raw_orcid":"https://orcid.org/0009-0009-0930-2535","affiliations":[{"raw_affiliation_string":"Kauel Inc., Menlo Park, Silicon Valley, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084104510","display_name":"Francklin Rivas","orcid":"https://orcid.org/0000-0002-5201-2877"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Francklin Rivas-Echeverr\u00eda","raw_affiliation_strings":["Kauel Inc., Menlo Park, Silicon Valley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-5201-2877","affiliations":[{"raw_affiliation_string":"Kauel Inc., Menlo Park, Silicon Valley, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088350698","display_name":"Manuel Eugenio Morocho-Cayamcela","orcid":"https://orcid.org/0000-0002-4705-7923"},"institutions":[{"id":"https://openalex.org/I3130401523","display_name":"Universidad Yachay Tech","ror":"https://ror.org/04jjswc10","country_code":"EC","type":"education","lineage":["https://openalex.org/I3130401523"]}],"countries":["EC"],"is_corresponding":false,"raw_author_name":"Manuel Eugenio Morocho-Cayamcela","raw_affiliation_strings":["School of Mathematical and Computational Sciences, Yachay Tech University, Urcuqu&#x00ED;, Ecuador"],"raw_orcid":"https://orcid.org/0000-0002-4705-7923","affiliations":[{"raw_affiliation_string":"School of Mathematical and Computational Sciences, Yachay Tech University, Urcuqu&#x00ED;, Ecuador","institution_ids":["https://openalex.org/I3130401523"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":9.1866,"has_fulltext":true,"cited_by_count":42,"citation_normalized_percentile":{"value":0.98640748,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"12","issue":null,"first_page":"13711","last_page":"13728"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9402285814285278},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8642151355743408},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7034430503845215},{"id":"https://openalex.org/keywords/residual-neural-network","display_name":"Residual neural network","score":0.6035544276237488},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5391116738319397},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5072700381278992},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4670844078063965},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3907480239868164},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.38072726130485535},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3711257576942444},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3695281445980072},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.35557928681373596},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3533105254173279},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.10567831993103027}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9402285814285278},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8642151355743408},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7034430503845215},{"id":"https://openalex.org/C2944601119","wikidata":"https://www.wikidata.org/wiki/Q43744058","display_name":"Residual neural network","level":3,"score":0.6035544276237488},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5391116738319397},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5072700381278992},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4670844078063965},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3907480239868164},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.38072726130485535},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3711257576942444},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3695281445980072},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.35557928681373596},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3533105254173279},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.10567831993103027},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3356551","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3356551","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10410861.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:98f9f35886c346faaa431208d3718d55","is_oa":true,"landing_page_url":"https://doaj.org/article/98f9f35886c346faaa431208d3718d55","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 13711-13728 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3356551","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3356551","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10410861.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391097126.pdf","grobid_xml":"https://content.openalex.org/works/W4391097126.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2481240925","https://openalex.org/W2549139847","https://openalex.org/W2892093496","https://openalex.org/W2896348597","https://openalex.org/W2945415645","https://openalex.org/W2964195121","https://openalex.org/W2975690918","https://openalex.org/W2986670728","https://openalex.org/W2990138404","https://openalex.org/W3007844454","https://openalex.org/W3016108562","https://openalex.org/W3023584964","https://openalex.org/W3037371133","https://openalex.org/W3040045570","https://openalex.org/W3040211378","https://openalex.org/W3064428683","https://openalex.org/W3093225449","https://openalex.org/W3114308765","https://openalex.org/W3117335323","https://openalex.org/W3138516171","https://openalex.org/W3173146464","https://openalex.org/W3174012740","https://openalex.org/W3198730297","https://openalex.org/W3217347476","https://openalex.org/W4206706211","https://openalex.org/W4220798687","https://openalex.org/W4282962858","https://openalex.org/W4292364243","https://openalex.org/W4312443924","https://openalex.org/W4366123788","https://openalex.org/W4386436010","https://openalex.org/W6682132143","https://openalex.org/W6763643401","https://openalex.org/W6794345597"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W3209117276","https://openalex.org/W4323777661","https://openalex.org/W4388184981"],"abstract_inverted_index":{"This":[0],"study":[1],"explores":[2],"the":[3,6,15,57,63],"effectiveness":[4],"of":[5,17,65,91,103,165],"ConvNeXt":[7,22,48,92,106,125,159],"model,":[8],"an":[9],"advanced":[10],"computer":[11],"vision":[12,130],"architecture,":[13],"in":[14,93,141],"task":[16],"image":[18,94,134],"captioning.":[19],"We":[20],"integrated":[21],"with":[23,79,121,146],"a":[24,31],"Long":[25],"Short-Term":[26],"Memory":[27],"network":[28],"that":[29],"includes":[30],"visual":[32],"attention":[33],"module,":[34],"focusing":[35],"on":[36,129],"assessing":[37],"its":[38],"performance":[39,99],"across":[40],"different":[41,53],"scenarios.":[42],"Experiments":[43],"were":[44,60],"conducted":[45],"using":[46,114,148],"various":[47],"versions":[49],"for":[50,112,119],"feature":[51],"extraction,":[52],"learning":[54],"rates":[55],"during":[56],"training":[58],"phase":[59],"tested,":[61],"and":[62,82,116,132,138,156,170,172,180],"impact":[64],"including":[66],"or":[67],"excluding":[68],"teacher-forcing":[69],"was":[70,77],"analyzed.":[71],"The":[72,89],"MS":[73],"COCO":[74],"2014":[75],"dataset":[76],"employed,":[78],"top-5":[80,162],"accuracy":[81,163],"BLEU-n":[83],"metrics":[84],"used":[85],"to":[86],"evaluate":[87],"performance.":[88],"implementation":[90],"captioning":[95],"systems":[96,147],"reveals":[97],"notable":[98],"enhancements.":[100],"In":[101],"terms":[102],"BLEU-4":[104,142],"scores,":[105],"outperformed":[107],"existing":[108],"benchmarks":[109],"by":[110,117,136,175],"43.04%":[111],"models":[113,127],"soft-attention":[115],"39.04%":[118],"those":[120],"hard-attention":[122],"mechanisms.":[123],"Furthermore,":[124],"surpassed":[126],"based":[128],"transformers":[131,135],"data-efficient":[133],"4.57%":[137],"0.93%,":[139],"respectively,":[140],"scores.":[143],"When":[144],"compared":[145],"encoders":[149],"such":[150],"as":[151],"ResNet-101,":[152],"ResNet-152,":[153],"VGG-16,":[154],"ResNeXt-101,":[155],"MobileNet":[157],"V3,":[158],"achieved":[160],"higher":[161],"improvements":[164],"6.44%,":[166],"6.46%,":[167],"6.47%,":[168],"6.39%,":[169],"6.68%,":[171],"reduced":[173],"loss":[174],"18.46%,":[176,178],"18.44%,":[177],"18.24%,":[179],"18.72%,":[181],"respectively.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":14}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
