{"id":"https://openalex.org/W2767361967","doi":"https://doi.org/10.1109/msp.2017.2741510","title":"Deep Learning for Image-to-Text Generation: A Technical Overview","display_name":"Deep Learning for Image-to-Text Generation: A Technical Overview","publication_year":2017,"publication_date":"2017-11-01","ids":{"openalex":"https://openalex.org/W2767361967","doi":"https://doi.org/10.1109/msp.2017.2741510","mag":"2767361967"},"language":"en","primary_location":{"id":"doi:10.1109/msp.2017.2741510","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msp.2017.2741510","pdf_url":null,"source":{"id":"https://openalex.org/S120977877","display_name":"IEEE Signal Processing Magazine","issn_l":"1053-5888","issn":["1053-5888","1558-0792"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101727205","display_name":"Xiaodong He","orcid":"https://orcid.org/0000-0002-9463-9168"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiaodong He","raw_affiliation_strings":["Deep Learning Group of Microsoft Research, Redmond, Washington","Department of Electrical Engineering and Computer Engineering, University of Washington, Seattle"],"affiliations":[{"raw_affiliation_string":"Deep Learning Group of Microsoft Research, Redmond, Washington","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Engineering, University of Washington, Seattle","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100671324","display_name":"Li Deng","orcid":"https://orcid.org/0000-0002-1014-0790"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Deng","raw_affiliation_strings":["Microsoft Research, Redmond, Washington"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, Washington","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101727205"],"corresponding_institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I201448701"],"apc_list":null,"apc_paid":null,"fwci":3.1448,"has_fulltext":false,"cited_by_count":117,"citation_normalized_percentile":{"value":0.95309197,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"34","issue":"6","first_page":"109","last_page":"116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9640217423439026},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7530171275138855},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5383895635604858},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5237928628921509},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4889739751815796},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4865321218967438},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.45298418402671814},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4152366518974304},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.4117092490196228},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.37123793363571167},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3671395480632782},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1658087968826294},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09540468454360962}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9640217423439026},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7530171275138855},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5383895635604858},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5237928628921509},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4889739751815796},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4865321218967438},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.45298418402671814},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4152366518974304},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.4117092490196228},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.37123793363571167},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3671395480632782},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1658087968826294},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09540468454360962},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/msp.2017.2741510","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msp.2017.2741510","pdf_url":null,"source":{"id":"https://openalex.org/S120977877","display_name":"IEEE Signal Processing Magazine","issn_l":"1053-5888","issn":["1053-5888","1558-0792"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Magazine","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.47999998927116394,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":96,"referenced_works":["https://openalex.org/W68733909","https://openalex.org/W1514535095","https://openalex.org/W1536680647","https://openalex.org/W1573040851","https://openalex.org/W1686810756","https://openalex.org/W1687846465","https://openalex.org/W1811254738","https://openalex.org/W1847088711","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1895989618","https://openalex.org/W1897761818","https://openalex.org/W1905882502","https://openalex.org/W1931639407","https://openalex.org/W1933349210","https://openalex.org/W1947481528","https://openalex.org/W1956340063","https://openalex.org/W1969616664","https://openalex.org/W2064675550","https://openalex.org/W2066134726","https://openalex.org/W2101105183","https://openalex.org/W2105103432","https://openalex.org/W2108598243","https://openalex.org/W2109586012","https://openalex.org/W2119775030","https://openalex.org/W2123301721","https://openalex.org/W2128856065","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2139501017","https://openalex.org/W2147768505","https://openalex.org/W2150341604","https://openalex.org/W2160815625","https://openalex.org/W2166010828","https://openalex.org/W2171361956","https://openalex.org/W2184045248","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2220981600","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2405756170","https://openalex.org/W2491664569","https://openalex.org/W2506483933","https://openalex.org/W2510185399","https://openalex.org/W2527569769","https://openalex.org/W2558834163","https://openalex.org/W2563399268","https://openalex.org/W2588822708","https://openalex.org/W2607151106","https://openalex.org/W2616969219","https://openalex.org/W2618530766","https://openalex.org/W2737766105","https://openalex.org/W2768661419","https://openalex.org/W2951183276","https://openalex.org/W2951326654","https://openalex.org/W2951824008","https://openalex.org/W2952072685","https://openalex.org/W2962706528","https://openalex.org/W2963084599","https://openalex.org/W2963149042","https://openalex.org/W2963576560","https://openalex.org/W2963630207","https://openalex.org/W2963758027","https://openalex.org/W2963954913","https://openalex.org/W2964018924","https://openalex.org/W2964024144","https://openalex.org/W2964049455","https://openalex.org/W2964138343","https://openalex.org/W2964241990","https://openalex.org/W2964268978","https://openalex.org/W2964308564","https://openalex.org/W3087871082","https://openalex.org/W4239072543","https://openalex.org/W4249013746","https://openalex.org/W6630875275","https://openalex.org/W6637306801","https://openalex.org/W6637373629","https://openalex.org/W6638742206","https://openalex.org/W6638824847","https://openalex.org/W6639102338","https://openalex.org/W6675392924","https://openalex.org/W6676297131","https://openalex.org/W6676497082","https://openalex.org/W6677994088","https://openalex.org/W6678262379","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6684369376","https://openalex.org/W6685230081","https://openalex.org/W6685670348","https://openalex.org/W6713645886","https://openalex.org/W6727654133","https://openalex.org/W6727862155","https://openalex.org/W6729046916","https://openalex.org/W6730746255"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W3009270862"],"abstract_inverted_index":{"Generating":[0],"a":[1],"natural":[2,19],"language":[3,20],"description":[4],"from":[5],"an":[6,9],"image":[7,32],"is":[8],"emerging":[10,103],"interdisciplinary":[11],"problem":[12],"at":[13],"the":[14,37,74,80,111,115,118],"intersection":[15],"of":[16,40],"computer":[17],"vision,":[18],"processing,":[21],"and":[22,55,61,114,127,130],"artificial":[23],"intelligence":[24,50],"(AI).":[25],"This":[26],"task,":[27],"often":[28],"referred":[29],"to":[30,67,73],"as":[31,45],"or":[33],"visual":[34,47,49,70,89,104],"captioning,":[35],"forms":[36],"technical":[38],"foundation":[39],"many":[41],"important":[42],"applications,":[43],"such":[44],"semantic":[46],"search,":[48],"in":[51,58,77,88,91,124,134],"chatting":[52],"robots,":[53],"photo":[54],"video":[56],"sharing":[57],"social":[59],"media,":[60],"aid":[62],"for":[63],"visually":[64],"impaired":[65],"people":[66],"perceive":[68],"surrounding":[69],"content.":[71],"Thanks":[72],"recent":[75,92],"advances":[76],"deep":[78],"learning,":[79],"AI":[81],"research":[82,126],"community":[83,119],"has":[84,120],"witnessed":[85],"tremendous":[86],"progress":[87,117],"captioning":[90,105],"years.":[93],"In":[94],"this":[95,101],"article,":[96],"we":[97],"will":[98,108],"first":[99],"summarize":[100],"exciting":[102],"area.":[106],"We":[107],"then":[109],"analyze":[110],"key":[112],"development":[113],"major":[116],"made,":[121],"their":[122],"impact":[123],"both":[125],"industry":[128],"deployment,":[129],"what":[131],"lies":[132],"ahead":[133],"future":[135],"breakthroughs.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":21},{"year":2023,"cited_by_count":23},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":3}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
