{"id":"https://openalex.org/W3129698646","doi":"https://doi.org/10.1109/access.2021.3058425","title":"VSAM-Based Visual Keyword Generation for Image Caption","display_name":"VSAM-Based Visual Keyword Generation for Image Caption","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3129698646","doi":"https://doi.org/10.1109/access.2021.3058425","mag":"3129698646"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3058425","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3058425","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09351979.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09351979.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053789988","display_name":"Suya Zhang","orcid":"https://orcid.org/0000-0002-1038-2618"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Suya Zhang","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1038-2618","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103216248","display_name":"Yana Zhang","orcid":"https://orcid.org/0000-0002-9287-3639"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yana Zhang","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9287-3639","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053686266","display_name":"Zeyu Chen","orcid":"https://orcid.org/0000-0003-2766-2031"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Chen","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2766-2031","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100376709","display_name":"Zhaohui Li","orcid":"https://orcid.org/0000-0002-6076-8367"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaohui Li","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5053789988"],"corresponding_institution_ids":["https://openalex.org/I75689368"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.3879,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.58922836,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"9","issue":null,"first_page":"27638","last_page":"27649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8717601895332336},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.6508327722549438},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6242423057556152},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5686619281768799},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.5549613833427429},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5189963579177856},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5011453628540039},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.4693411588668823},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4197882413864136},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41720354557037354},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.4147062599658966},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3776276409626007},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3369966447353363},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.3189041018486023},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.28233277797698975}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8717601895332336},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.6508327722549438},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6242423057556152},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5686619281768799},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.5549613833427429},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5189963579177856},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5011453628540039},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.4693411588668823},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4197882413864136},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41720354557037354},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.4147062599658966},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3776276409626007},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3369966447353363},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.3189041018486023},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.28233277797698975},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2021.3058425","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3058425","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09351979.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:d4157ddbcdab4e89bb2763f8104090e9","is_oa":true,"landing_page_url":"https://doaj.org/article/d4157ddbcdab4e89bb2763f8104090e9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 27638-27649 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3058425","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3058425","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09351979.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6299999952316284}],"awards":[],"funders":[{"id":"https://openalex.org/F4320329139","display_name":"Communication University of China","ror":"https://ror.org/04facbs33"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3129698646.pdf","grobid_xml":"https://content.openalex.org/works/W3129698646.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W8316075","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1687846465","https://openalex.org/W1858383477","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1897761818","https://openalex.org/W1931639407","https://openalex.org/W1969616664","https://openalex.org/W1999567494","https://openalex.org/W2019370496","https://openalex.org/W2081580037","https://openalex.org/W2123442489","https://openalex.org/W2143449221","https://openalex.org/W2250379249","https://openalex.org/W2250539671","https://openalex.org/W2296385829","https://openalex.org/W2550553598","https://openalex.org/W2575842049","https://openalex.org/W2604729005","https://openalex.org/W2745461083","https://openalex.org/W2795151422","https://openalex.org/W2808138519","https://openalex.org/W2903617461","https://openalex.org/W2963084599","https://openalex.org/W2963088515","https://openalex.org/W2964049455","https://openalex.org/W2964121744","https://openalex.org/W2983141445","https://openalex.org/W2984138079","https://openalex.org/W2986670728","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W6600334730","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6637306801","https://openalex.org/W6639118148","https://openalex.org/W6681184217","https://openalex.org/W6729046916","https://openalex.org/W6736419893"],"related_works":["https://openalex.org/W2002918846","https://openalex.org/W2140369944","https://openalex.org/W2370180225","https://openalex.org/W2186394444","https://openalex.org/W2735794310","https://openalex.org/W2071180033","https://openalex.org/W2120663665","https://openalex.org/W2083396186","https://openalex.org/W2373526234","https://openalex.org/W2605741187"],"abstract_inverted_index":{"Image":[0,185],"caption":[1,71],"is":[2,11,50,61,196],"to":[3,13,131,146,198,233],"understand":[4],"and":[5,47,70,90,138,145,150,167],"describe":[6],"the":[7,38,41,48,56,59,64,83,88,91,97,106,113,116,121,126,133,142,179,204,208,223,241,250],"visual":[8,44,148,159,183,200,210,235,244],"content,":[9],"which":[10],"expected":[12],"be":[14],"applied":[15],"in":[16,20,31,103,112,141,228],"automatic":[17],"news":[18],"reporting":[19],"future.":[21],"In":[22,55,206],"recent":[23],"years,":[24],"there":[25],"has":[26],"been":[27],"an":[28,32,172,215,229],"increasing":[29],"interest":[30],"Encoder-Decoder":[33,57,143],"framework":[34,58],"for":[35,43,52,77,202],"image":[36,67,129,136,173],"caption:":[37],"encoder":[39],"takes":[40],"responsibility":[42],"semantic":[45,151],"comprehension":[46],"decoder":[49],"designed":[51],"sentence":[53,139],"generation.":[54],"translation":[60],"based":[62],"on":[63,220],"correspondence":[65],"between":[66,135,165],"feature":[68],"vectors":[69],"vectors.":[72],"Attention":[73,194],"mechanism":[74],"makes":[75],"sense":[76],"a":[78,156,162,191],"more":[79],"accurate":[80],"correspondence.":[81],"However,":[82],"attention":[84],"model":[85,252],"works":[86],"with":[87,96],"decoder,":[89],"focused":[92],"content":[93],"changes":[94],"dynamically":[95],"generated":[98],"word.":[99],"It":[100],"results":[101],"that":[102,240],"many":[104],"cases":[105],"salient":[107,122],"contents":[108],"are":[109,119,212,226],"not":[110,120],"described":[111,118],"caption,":[114,130],"or":[115],"objects":[117],"ones.":[123],"To":[124],"improve":[125],"precision":[127,242],"of":[128,158,182,243],"bridge":[132],"gap":[134],"understanding":[137],"generation":[140,246],"framework,":[144],"align":[147],"information":[149,152],"better,":[153],"we":[154],"propose":[155],"concept":[157],"keyword":[160,245],"as":[161,178],"gang":[163],"board":[164],"seeing":[166],"saying.":[168],"This":[169],"paper":[170],"presents":[171],"dataset":[174],"derived":[175],"from":[176],"MSCOCO":[177],"first":[180],"collection":[181],"keywords:":[184],"Visual":[186,192],"Keyword":[187],"Dataset":[188],"(IVKD).":[189],"Also,":[190],"Semantic":[193],"Model(VSAM)":[195],"proposed":[197,251],"obtain":[199],"keywords":[201],"generating":[203],"annotation.":[205],"VSAM,":[207],"object-level":[209],"features":[211,225],"extracted":[213],"by":[214,249],"object":[216,224],"detector":[217],"after":[218],"pre-training":[219],"IVKD.":[221],"Then":[222],"fed":[227],"Optimized":[230],"Pointer":[231],"Network(OPN)":[232],"generate":[234],"keywords.":[236],"The":[237],"experiments":[238],"show":[239],"reaches":[247],"91.7%":[248],"VSAM.":[253]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
