{"id":"https://openalex.org/W4388893791","doi":"https://doi.org/10.1109/wincom59760.2023.10322923","title":"Evolution of Image Captioning Models: An Overview","display_name":"Evolution of Image Captioning Models: An Overview","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4388893791","doi":"https://doi.org/10.1109/wincom59760.2023.10322923"},"language":"en","primary_location":{"id":"doi:10.1109/wincom59760.2023.10322923","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wincom59760.2023.10322923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 10th International Conference on Wireless Networks and Mobile Communications (WINCOM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093111575","display_name":"Abdelkrim Saouabe","orcid":"https://orcid.org/0009-0001-1056-0322"},"institutions":[{"id":"https://openalex.org/I3121676899","display_name":"Universit\u00e9 Ibn-Tofail","ror":"https://ror.org/02wj89n04","country_code":"MA","type":"education","lineage":["https://openalex.org/I3121676899"]}],"countries":["MA"],"is_corresponding":true,"raw_author_name":"Abdelkrim Saouabe","raw_affiliation_strings":["IbnTofail University,Computer Science Research Laboratory, Faculty of Sciences,K&#x00E9;nitra,Morocco"],"affiliations":[{"raw_affiliation_string":"IbnTofail University,Computer Science Research Laboratory, Faculty of Sciences,K&#x00E9;nitra,Morocco","institution_ids":["https://openalex.org/I3121676899"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067645236","display_name":"Said Tkatek","orcid":"https://orcid.org/0000-0002-2800-7138"},"institutions":[{"id":"https://openalex.org/I3121676899","display_name":"Universit\u00e9 Ibn-Tofail","ror":"https://ror.org/02wj89n04","country_code":"MA","type":"education","lineage":["https://openalex.org/I3121676899"]}],"countries":["MA"],"is_corresponding":false,"raw_author_name":"Said Tkatek","raw_affiliation_strings":["IbnTofail University,Computer Science Research Laboratory, Faculty of Sciences,K&#x00E9;nitra,Morocco"],"affiliations":[{"raw_affiliation_string":"IbnTofail University,Computer Science Research Laboratory, Faculty of Sciences,K&#x00E9;nitra,Morocco","institution_ids":["https://openalex.org/I3121676899"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005909413","display_name":"Merouane Mazar","orcid":"https://orcid.org/0000-0002-4276-8654"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merouane Mazar","raw_affiliation_strings":["AKKODIS,Paris,France","AKKODIS, Paris, France"],"affiliations":[{"raw_affiliation_string":"AKKODIS,Paris,France","institution_ids":[]},{"raw_affiliation_string":"AKKODIS, Paris, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5093111596","display_name":"Imad Mourtaji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Imad Mourtaji","raw_affiliation_strings":["AKKODIS,Paris,France","AKKODIS, Paris, France"],"affiliations":[{"raw_affiliation_string":"AKKODIS,Paris,France","institution_ids":[]},{"raw_affiliation_string":"AKKODIS, Paris, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5093111575"],"corresponding_institution_ids":["https://openalex.org/I3121676899"],"apc_list":null,"apc_paid":null,"fwci":0.4919,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.66367986,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.965656042098999},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6864599585533142},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5757594704627991},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47244933247566223},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3999876379966736},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33134520053863525}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.965656042098999},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6864599585533142},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5757594704627991},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47244933247566223},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3999876379966736},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33134520053863525}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wincom59760.2023.10322923","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wincom59760.2023.10322923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 10th International Conference on Wireless Networks and Mobile Communications (WINCOM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1527575280","https://openalex.org/W2803259101","https://openalex.org/W2803525166","https://openalex.org/W2896457183","https://openalex.org/W2963758027","https://openalex.org/W2963834202","https://openalex.org/W2977928982","https://openalex.org/W2983141445","https://openalex.org/W2988916019","https://openalex.org/W2997443031","https://openalex.org/W3035323998","https://openalex.org/W3091588028","https://openalex.org/W3101313921","https://openalex.org/W3108170342","https://openalex.org/W3110019360","https://openalex.org/W3123936469","https://openalex.org/W3174377922","https://openalex.org/W3174476431","https://openalex.org/W3174930619","https://openalex.org/W3193402170","https://openalex.org/W3214200059","https://openalex.org/W4220746441","https://openalex.org/W4220968371","https://openalex.org/W4288329833","https://openalex.org/W4366721356","https://openalex.org/W4376638872","https://openalex.org/W4379113548","https://openalex.org/W4385245566","https://openalex.org/W6631516269","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6763643401","https://openalex.org/W6789019991","https://openalex.org/W6791353385","https://openalex.org/W6800139874"],"related_works":["https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W4289422896","https://openalex.org/W1938708284","https://openalex.org/W4380190185","https://openalex.org/W3204019825"],"abstract_inverted_index":{"This":[0],"article":[1,41,73,103,136],"presents":[2],"a":[3],"state-of-the-art":[4],"review":[5],"of":[6,27,37,112,131,140],"image":[7,44,70,80,132],"captioning":[8,45,84],"methodologies":[9],"developed":[10],"in":[11,34,67],"the":[12,24,35,102,110,121,129,135],"past":[13],"five":[14],"years.":[15],"Image":[16],"captioning,":[17],"which":[18],"aims":[19],"to":[20,55,64,79],"generate":[21],"text":[22,99],"describing":[23],"visual":[25,93],"content":[26],"an":[28],"image,":[29],"has":[30],"gained":[31],"increasing":[32],"interest":[33],"field":[36],"artificial":[38],"intelligence.":[39],"The":[40,72],"analyzes":[42],"how":[43],"methods":[46,58],"have":[47,60,127],"evolved,":[48],"transitioning":[49],"from":[50],"traditional":[51],"machine":[52],"learning-based":[53,57],"approaches":[54,78,90],"deep":[56],"that":[59,126],"become":[61],"dominant":[62],"due":[63],"their":[65],"effectiveness":[66],"efficiently":[68],"extracting":[69],"features.":[71],"also":[74],"explores":[75],"two":[76],"main":[77],"captioning:":[81],"dense":[82],"(region-based)":[83],"and":[85,95,116,158],"whole-scene":[86],"captioning.":[87,133],"It":[88],"discusses":[89],"based":[91],"on":[92],"space":[94,97],"multimodal":[96],"for":[98,161],"generation.":[100],"Furthermore,":[101],"examines":[104],"reinforcement":[105],"learning":[106],"methods,":[107],"semantic":[108],"enhancements,":[109],"use":[111],"self-attention":[113],"transformer":[114],"models,":[115],"pretrained":[117],"models":[118],"such":[119],"as":[120],"Generative":[122],"Pre-trained":[123],"Transformer":[124],"(GPT)":[125],"improved":[128],"performance":[130],"Finally,":[134],"highlights":[137],"various":[138],"applications":[139],"this":[141],"technique,":[142],"including":[143],"human-machine":[144],"interaction,":[145],"biomedicine,":[146],"automatic":[147],"medical":[148],"prescription,":[149],"children\u2019s":[150],"education,":[151],"industrial":[152],"quality":[153],"control,":[154],"traffic":[155],"data":[156],"analysis,":[157],"assistive":[159],"technologies":[160],"visually":[162],"impaired":[163],"individuals.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
