{"id":"https://openalex.org/W2911285743","doi":"https://doi.org/10.1145/3292058","title":"Image Captioning With Visual-Semantic Double Attention","display_name":"Image Captioning With Visual-Semantic Double Attention","publication_year":2019,"publication_date":"2019-01-23","ids":{"openalex":"https://openalex.org/W2911285743","doi":"https://doi.org/10.1145/3292058","mag":"2911285743"},"language":"en","primary_location":{"id":"doi:10.1145/3292058","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292058","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041482841","display_name":"Chen He","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chen He","raw_affiliation_strings":["School of Electronics and Information Technology, Sun Yat-Sen University, Guangdong, People's Republic of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-Sen University, Guangdong, People's Republic of China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056953478","display_name":"Haifeng Hu","orcid":"https://orcid.org/0000-0002-4884-323X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Hu","raw_affiliation_strings":["School of Electronics and Information Technology, Sun Yat-Sen University, Guangdong, People's Republic of China"],"raw_orcid":"https://orcid.org/0000-0002-4884-323X","affiliations":[{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-Sen University, Guangdong, People's Republic of China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5041482841"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":1.4296,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.85186645,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"15","issue":"1","first_page":"1","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8788741230964661},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8321006298065186},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6706520915031433},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6423665285110474},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.63511061668396},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.565592348575592},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.45325028896331787},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.4243980050086975},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4063262939453125},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.096686452627182}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8788741230964661},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8321006298065186},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6706520915031433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6423665285110474},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.63511061668396},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.565592348575592},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.45325028896331787},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.4243980050086975},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4063262939453125},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.096686452627182},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3292058","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292058","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8700000047683716,"display_name":"Life below water","id":"https://metadata.un.org/sdg/14"}],"awards":[{"id":"https://openalex.org/G1786224844","display_name":null,"funder_award_id":"61673402, 61273270, and 60802069","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8670469836","display_name":null,"funder_award_id":"2017A030311029, 2016B010123005 and 2017B090909005","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W8316075","https://openalex.org/W1514535095","https://openalex.org/W1516184288","https://openalex.org/W1522301498","https://openalex.org/W1686810756","https://openalex.org/W1780856595","https://openalex.org/W1811254738","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1897761818","https://openalex.org/W1905882502","https://openalex.org/W1931639407","https://openalex.org/W1956340063","https://openalex.org/W1969616664","https://openalex.org/W2101105183","https://openalex.org/W2105103432","https://openalex.org/W2123301721","https://openalex.org/W2139380585","https://openalex.org/W2143449221","https://openalex.org/W2154652894","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2293453011","https://openalex.org/W2302086703","https://openalex.org/W2463565445","https://openalex.org/W2552161745","https://openalex.org/W2575842049","https://openalex.org/W2600463316","https://openalex.org/W2604178507","https://openalex.org/W2624317482","https://openalex.org/W2740118378","https://openalex.org/W2788237871","https://openalex.org/W2949376505","https://openalex.org/W2963084599","https://openalex.org/W2963954913","https://openalex.org/W3103022576"],"related_works":["https://openalex.org/W3217195652","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W1938708284","https://openalex.org/W4380190185","https://openalex.org/W4387608643","https://openalex.org/W4210657415"],"abstract_inverted_index":{"In":[0,15,75,92],"this":[1],"article,":[2],"we":[3],"propose":[4],"a":[5,23,36],"novel":[6],"Visual-Semantic":[7],"Double":[8],"Attention":[9,39],"(VSDA)":[10],"model":[11,27,41,81,153],"for":[12],"image":[13,33],"captioning.":[14],"our":[16,80,127,152],"approach,":[17],"VSDA":[18,99,183],"consists":[19],"of":[20,56,63,98,104,115,157,163],"two":[21],"parts:":[22],"modified":[24],"visual":[25,133,143,164],"attention":[26,134],"is":[28,42],"used":[29],"to":[30,44,119,149,159],"extract":[31],"sub-region":[32],"features,":[34],"then":[35],"new":[37],"SEmantic":[38],"(SEA)":[40],"proposed":[43],"distill":[45],"semantic":[46,73,108,122,145],"features.":[47,74],"Traditional":[48],"attribute-based":[49],"models":[50,135],"always":[51],"neglect":[52],"the":[53,83,95,102,113,121,130,161],"distinctive":[54],"importance":[55],"each":[57,78,150],"attribute":[58,117],"word":[59,86],"and":[60,144,165,177,187],"fuse":[61],"all":[62],"them":[64,158],"into":[65],"recurrent":[66],"neural":[67],"networks,":[68],"resulting":[69],"in":[70,101],"abundant":[71],"irrelevant":[72,116],"contrast,":[76],"at":[77],"timestep,":[79],"selects":[82],"most":[84],"relevant":[85],"that":[87,132,142,182],"aligns":[88],"with":[89],"current":[90],"context.":[91],"other":[93,185],"words,":[94],"real":[96],"power":[97],"lies":[100],"ability":[103],"not":[105],"only":[106],"leveraging":[107],"features":[109,146],"but":[110],"also":[111],"eliminating":[112],"influence":[114],"words":[118],"make":[120],"guidance":[123],"more":[124],"precise.":[125],"Furthermore,":[126],"approach":[128],"solves":[129],"problem":[131],"cannot":[136],"boost":[137],"generating":[138],"non-visual":[139,166],"words.":[140,167],"Considering":[141],"are":[147,170],"complementary":[148],"other,":[151],"can":[154],"leverage":[155],"both":[156],"strengthen":[160],"generations":[162],"Extensive":[168],"experiments":[169],"conducted":[171],"on":[172],"famous":[173],"datasets:":[174],"MS":[175],"COCO":[176],"Flickr30k.":[178],"The":[179],"results":[180],"show":[181],"outperforms":[184],"methods":[186],"achieves":[188],"promising":[189],"performance.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
