{"id":"https://openalex.org/W4417290885","doi":"https://doi.org/10.1016/j.neucom.2025.132385","title":"Pixels to prose: A comprehensive survey of image captioning techniques with deep learning and generative artificial intelligence","display_name":"Pixels to prose: A comprehensive survey of image captioning techniques with deep learning and generative artificial intelligence","publication_year":2025,"publication_date":"2025-12-13","ids":{"openalex":"https://openalex.org/W4417290885","doi":"https://doi.org/10.1016/j.neucom.2025.132385"},"language":"en","primary_location":{"id":"doi:10.1016/j.neucom.2025.132385","is_oa":false,"landing_page_url":"https://doi.org/10.1016/j.neucom.2025.132385","pdf_url":null,"source":{"id":"https://openalex.org/S45693802","display_name":"Neurocomputing","issn_l":"0925-2312","issn":["0925-2312","1872-8286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neurocomputing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035867732","display_name":"Aarti Sharma","orcid":null},"institutions":[{"id":"https://openalex.org/I154851008","display_name":"Indian Institute of Technology Roorkee","ror":"https://ror.org/00582g326","country_code":"IN","type":"education","lineage":["https://openalex.org/I154851008"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Aarti Sharma","raw_affiliation_strings":["Department of Applied Mathematics & Scientific Computing, Indian Institute of Technology, Roorkee, India"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics & Scientific Computing, Indian Institute of Technology, Roorkee, India","institution_ids":["https://openalex.org/I154851008"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084086661","display_name":"Hrishikesh Singh","orcid":null},"institutions":[{"id":"https://openalex.org/I154851008","display_name":"Indian Institute of Technology Roorkee","ror":"https://ror.org/00582g326","country_code":"IN","type":"education","lineage":["https://openalex.org/I154851008"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Hrishikesh Singh","raw_affiliation_strings":["Department of Applied Mathematics & Scientific Computing, Indian Institute of Technology, Roorkee, India"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics & Scientific Computing, Indian Institute of Technology, Roorkee, India","institution_ids":["https://openalex.org/I154851008"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081953081","display_name":"Millie Pant","orcid":"https://orcid.org/0000-0002-7668-7887"},"institutions":[{"id":"https://openalex.org/I154851008","display_name":"Indian Institute of Technology Roorkee","ror":"https://ror.org/00582g326","country_code":"IN","type":"education","lineage":["https://openalex.org/I154851008"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Millie Pant","raw_affiliation_strings":["Department of Applied Mathematics & Scientific Computing, Indian Institute of Technology, Roorkee, India","Mehta Family School of Data Science & Artificial Intelligence, Indian Institute of Technology, Roorkee, India"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics & Scientific Computing, Indian Institute of Technology, Roorkee, India","institution_ids":["https://openalex.org/I154851008"]},{"raw_affiliation_string":"Mehta Family School of Data Science & Artificial Intelligence, Indian Institute of Technology, Roorkee, India","institution_ids":["https://openalex.org/I154851008"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035867732"],"corresponding_institution_ids":["https://openalex.org/I154851008"],"apc_list":{"value":2470,"currency":"USD","value_usd":2470},"apc_paid":null,"fwci":1.1573,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85047749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"667","issue":null,"first_page":"132385","last_page":"132385"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.0026000000070780516,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0012000000569969416,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9945999979972839},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.54830002784729},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5236999988555908},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5142999887466431},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4968000054359436},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.44690001010894775}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9945999979972839},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8306999802589417},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.682699978351593},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.54830002784729},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5236999988555908},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5142999887466431},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4968000054359436},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.44690001010894775},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4250999987125397},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.42089998722076416},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3763999938964844},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.34610000252723694},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.34220001101493835},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2775999903678894},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.274399995803833},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2524999976158142},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.neucom.2025.132385","is_oa":false,"landing_page_url":"https://doi.org/10.1016/j.neucom.2025.132385","pdf_url":null,"source":{"id":"https://openalex.org/S45693802","display_name":"Neurocomputing","issn_l":"0925-2312","issn":["0925-2312","1872-8286"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neurocomputing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1947481528","https://openalex.org/W1956340063","https://openalex.org/W2277195237","https://openalex.org/W2463955103","https://openalex.org/W2560645892","https://openalex.org/W2575842049","https://openalex.org/W2745461083","https://openalex.org/W2886641317","https://openalex.org/W2890531016","https://openalex.org/W2896348597","https://openalex.org/W2904565150","https://openalex.org/W2904993015","https://openalex.org/W2910121883","https://openalex.org/W2954841306","https://openalex.org/W2963062932","https://openalex.org/W2963101956","https://openalex.org/W2963109634","https://openalex.org/W2963649796","https://openalex.org/W2963686907","https://openalex.org/W2964018924","https://openalex.org/W2970231061","https://openalex.org/W2979739834","https://openalex.org/W2979861699","https://openalex.org/W2982260276","https://openalex.org/W2989176720","https://openalex.org/W2992478697","https://openalex.org/W2997248215","https://openalex.org/W3002225283","https://openalex.org/W3024761859","https://openalex.org/W3034655362","https://openalex.org/W3034984754","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W3042724941","https://openalex.org/W3046675509","https://openalex.org/W3080764280","https://openalex.org/W3089915566","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3106859150","https://openalex.org/W3119689583","https://openalex.org/W3138516171","https://openalex.org/W3146366485","https://openalex.org/W3164654615","https://openalex.org/W3173220247","https://openalex.org/W3174042809","https://openalex.org/W3176481196","https://openalex.org/W3184784418","https://openalex.org/W3195680250","https://openalex.org/W3205625102","https://openalex.org/W3208624098","https://openalex.org/W3210420162","https://openalex.org/W3217347476","https://openalex.org/W4200498145","https://openalex.org/W4281686064","https://openalex.org/W4283216168","https://openalex.org/W4289654392","https://openalex.org/W4308727252","https://openalex.org/W4312784228","https://openalex.org/W4312983485","https://openalex.org/W4315488064","https://openalex.org/W4385762291","https://openalex.org/W4386162736","https://openalex.org/W4387132565","https://openalex.org/W4395041667","https://openalex.org/W4409974385","https://openalex.org/W4413401670","https://openalex.org/W4416922317"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-22T08:38:42.863108","created_date":"2025-12-13T00:00:00"}
