{"id":"https://openalex.org/W4386162736","doi":"https://doi.org/10.1145/3617592","title":"Deep Learning Approaches on Image Captioning: A Review","display_name":"Deep Learning Approaches on Image Captioning: A Review","publication_year":2023,"publication_date":"2023-08-25","ids":{"openalex":"https://openalex.org/W4386162736","doi":"https://doi.org/10.1145/3617592"},"language":"en","primary_location":{"id":"doi:10.1145/3617592","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3617592","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070753126","display_name":"Taraneh Ghandi","orcid":"https://orcid.org/0000-0003-4561-827X"},"institutions":[{"id":"https://openalex.org/I98251732","display_name":"McMaster University","ror":"https://ror.org/02fa3aq29","country_code":"CA","type":"education","lineage":["https://openalex.org/I98251732"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Taraneh Ghandi","raw_affiliation_strings":["McMaster University, Canada"],"affiliations":[{"raw_affiliation_string":"McMaster University, Canada","institution_ids":["https://openalex.org/I98251732"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040672682","display_name":"Hamid Reza Pourreza","orcid":"https://orcid.org/0000-0002-3560-8070"},"institutions":[{"id":"https://openalex.org/I86958956","display_name":"Ferdowsi University of Mashhad","ror":"https://ror.org/00g6ka752","country_code":"IR","type":"education","lineage":["https://openalex.org/I86958956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hamidreza Pourreza","raw_affiliation_strings":["Ferdowsi University of Mashhad, Iran"],"affiliations":[{"raw_affiliation_string":"Ferdowsi University of Mashhad, Iran","institution_ids":["https://openalex.org/I86958956"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101675523","display_name":"Hamidreza Mahyar","orcid":"https://orcid.org/0000-0003-0397-7258"},"institutions":[{"id":"https://openalex.org/I98251732","display_name":"McMaster University","ror":"https://ror.org/02fa3aq29","country_code":"CA","type":"education","lineage":["https://openalex.org/I98251732"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hamidreza Mahyar","raw_affiliation_strings":["McMaster University, Canada"],"affiliations":[{"raw_affiliation_string":"McMaster University, Canada","institution_ids":["https://openalex.org/I98251732"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5070753126"],"corresponding_institution_ids":["https://openalex.org/I98251732"],"apc_list":null,"apc_paid":null,"fwci":17.8901,"has_fulltext":false,"cited_by_count":150,"citation_normalized_percentile":{"value":0.99570563,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"56","issue":"3","first_page":"1","last_page":"39"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9769672751426697},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8822090029716492},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6630417108535767},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6424086093902588},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5307735800743103},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5105577707290649},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5042687654495239},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46409741044044495},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3546932339668274},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3352031707763672}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9769672751426697},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8822090029716492},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6630417108535767},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6424086093902588},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5307735800743103},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5105577707290649},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5042687654495239},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46409741044044495},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3546932339668274},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3352031707763672},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3617592","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3617592","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7699999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":138,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1593271688","https://openalex.org/W1686810756","https://openalex.org/W1895577753","https://openalex.org/W1899504021","https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2108598243","https://openalex.org/W2116341502","https://openalex.org/W2125389028","https://openalex.org/W2157331557","https://openalex.org/W2183341477","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2506483933","https://openalex.org/W2552161745","https://openalex.org/W2552839021","https://openalex.org/W2558333741","https://openalex.org/W2558834163","https://openalex.org/W2559655401","https://openalex.org/W2568262903","https://openalex.org/W2578190051","https://openalex.org/W2579549467","https://openalex.org/W2591644541","https://openalex.org/W2600463316","https://openalex.org/W2604178507","https://openalex.org/W2607662938","https://openalex.org/W2607855566","https://openalex.org/W2612690371","https://openalex.org/W2625940279","https://openalex.org/W2745461083","https://openalex.org/W2754927243","https://openalex.org/W2803206166","https://openalex.org/W2807697862","https://openalex.org/W2885013662","https://openalex.org/W2886641317","https://openalex.org/W2886970679","https://openalex.org/W2890531016","https://openalex.org/W2896348597","https://openalex.org/W2901988662","https://openalex.org/W2904565150","https://openalex.org/W2904993015","https://openalex.org/W2913059114","https://openalex.org/W2913618459","https://openalex.org/W2949376505","https://openalex.org/W2950096400","https://openalex.org/W2955956881","https://openalex.org/W2962735233","https://openalex.org/W2962793481","https://openalex.org/W2963048642","https://openalex.org/W2963062932","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2963170456","https://openalex.org/W2963201326","https://openalex.org/W2963383024","https://openalex.org/W2963446712","https://openalex.org/W2963536419","https://openalex.org/W2963649796","https://openalex.org/W2963686907","https://openalex.org/W2963743213","https://openalex.org/W2963762755","https://openalex.org/W2963834202","https://openalex.org/W2963938081","https://openalex.org/W2963954913","https://openalex.org/W2964018924","https://openalex.org/W2964080601","https://openalex.org/W2964350391","https://openalex.org/W2964616647","https://openalex.org/W2973978812","https://openalex.org/W2974212192","https://openalex.org/W2979739834","https://openalex.org/W2982260276","https://openalex.org/W2986670728","https://openalex.org/W2989377923","https://openalex.org/W2990818246","https://openalex.org/W2992478697","https://openalex.org/W2997591391","https://openalex.org/W3005983418","https://openalex.org/W3033562610","https://openalex.org/W3034655362","https://openalex.org/W3034984754","https://openalex.org/W3035284526","https://openalex.org/W3091588028","https://openalex.org/W3092462694","https://openalex.org/W3095670406","https://openalex.org/W3096609285","https://openalex.org/W3101313921","https://openalex.org/W3103022576","https://openalex.org/W3104279398","https://openalex.org/W3106859150","https://openalex.org/W3107848485","https://openalex.org/W3110019360","https://openalex.org/W3114308765","https://openalex.org/W3119438769","https://openalex.org/W3124149278","https://openalex.org/W3126988965","https://openalex.org/W3134249459","https://openalex.org/W3138516171","https://openalex.org/W3171125843","https://openalex.org/W3172845016","https://openalex.org/W3173220247","https://openalex.org/W3174476431","https://openalex.org/W3174770825","https://openalex.org/W3185154749","https://openalex.org/W3192652975","https://openalex.org/W3196091473","https://openalex.org/W3196479518","https://openalex.org/W3209274285","https://openalex.org/W4212774754","https://openalex.org/W4213298225","https://openalex.org/W4221147537","https://openalex.org/W4224046780","https://openalex.org/W4225323055","https://openalex.org/W4282968790","https://openalex.org/W4285151156","https://openalex.org/W4285602612","https://openalex.org/W4288083516","https://openalex.org/W4292199884","https://openalex.org/W4292828275","https://openalex.org/W4293387332","https://openalex.org/W4295845549","https://openalex.org/W4304080274","https://openalex.org/W4307106676","https://openalex.org/W4312289196","https://openalex.org/W4312922092","https://openalex.org/W4313131769","https://openalex.org/W4318718936","https://openalex.org/W4323557204","https://openalex.org/W6678815747","https://openalex.org/W6752113587","https://openalex.org/W6789705400","https://openalex.org/W6803567076","https://openalex.org/W6809596087","https://openalex.org/W6809741970","https://openalex.org/W6850822301"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W1938708284","https://openalex.org/W4380190185"],"abstract_inverted_index":{"Image":[0],"captioning":[1,62,84,99],"is":[2],"a":[3,53,65],"research":[4,152],"area":[5],"of":[6,21,26,56,97,189],"immense":[7],"importance,":[8],"aiming":[9],"to":[10,40,93,133,175,184],"generate":[11],"natural":[12],"language":[13],"descriptions":[14],"for":[15,151],"visual":[16],"content":[17],"in":[18,60,73,82,106,153],"the":[19,37,78,89,95,103,141,159,187],"form":[20],"still":[22],"images.":[23],"The":[24],"advent":[25],"deep":[27,57,128],"learning":[28,58,129],"and":[29,44,68,122,165,179],"more":[30,41],"recently":[31],"vision-language":[32,172],"pre-training":[33,173],"techniques":[34],"has":[35],"revolutionized":[36],"field,":[38],"leading":[39],"sophisticated":[42],"methods":[43,59,174],"improved":[45,181],"performance.":[46],"In":[47],"this":[48,107,154],"survey":[49],"article,":[50],"we":[51,76,145],"provide":[52],"structured":[54],"review":[55],"image":[61,83,164,190],"by":[63,109],"presenting":[64],"comprehensive":[66],"taxonomy":[67],"discussing":[69],"each":[70],"method":[71],"category":[72],"detail.":[74],"Additionally,":[75],"examine":[77],"datasets":[79],"commonly":[80],"employed":[81],"research,":[85],"as":[86,88,113],"well":[87],"evaluation":[90,136,182],"metrics":[91],"used":[92,135],"assess":[94],"performance":[96,131],"different":[98,127],"models.":[100],"We":[101,125],"address":[102],"challenges":[104],"faced":[105],"field":[108],"emphasizing":[110],"issues":[111],"such":[112],"object":[114],"hallucination,":[115],"missing":[116],"context,":[117],"illumination":[118],"conditions,":[119],"contextual":[120],"understanding,":[121],"referring":[123],"expressions.":[124],"rank":[126],"methods\u2019":[130],"according":[132],"widely":[134],"metrics,":[137],"giving":[138],"insight":[139],"into":[140],"current":[142],"state-of-the-art.":[143],"Furthermore,":[144],"identify":[146],"several":[147],"potential":[148],"future":[149],"directions":[150],"area,":[155],"which":[156],"include":[157],"tackling":[158],"information":[160],"misalignment":[161],"problem":[162],"between":[163],"text":[166],"modalities,":[167],"mitigating":[168],"dataset":[169],"bias,":[170],"incorporating":[171],"enhance":[176],"caption":[177],"generation,":[178],"developing":[180],"tools":[183],"accurately":[185],"measure":[186],"quality":[188],"captions.":[191]},"counts_by_year":[{"year":2026,"cited_by_count":20},{"year":2025,"cited_by_count":66},{"year":2024,"cited_by_count":53},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
