{"id":"https://openalex.org/W3038593179","doi":"https://doi.org/10.1145/3394955","title":"Image Captioning with a Joint Attention Mechanism by Visual Concept Samples","display_name":"Image Captioning with a Joint Attention Mechanism by Visual Concept Samples","publication_year":2020,"publication_date":"2020-07-05","ids":{"openalex":"https://openalex.org/W3038593179","doi":"https://doi.org/10.1145/3394955","mag":"3038593179"},"language":"en","primary_location":{"id":"doi:10.1145/3394955","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394955","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100603557","display_name":"Jin Yuan","orcid":"https://orcid.org/0000-0002-9600-7789"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Yuan","raw_affiliation_strings":["Hunan University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hunan University, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100746459","display_name":"Lei Zhang","orcid":"https://orcid.org/0000-0001-7440-3329"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Zhang","raw_affiliation_strings":["Hunan University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hunan University, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029912209","display_name":"Songrui Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songrui Guo","raw_affiliation_strings":["Hunan University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hunan University, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100730001","display_name":"Yi Xiao","orcid":"https://orcid.org/0000-0002-4911-0353"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Xiao","raw_affiliation_strings":["Hunan University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hunan University, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100396723","display_name":"Zhiyong Li","orcid":"https://orcid.org/0000-0001-9720-5915"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Li","raw_affiliation_strings":["Hunan University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hunan University, China","institution_ids":["https://openalex.org/I16609230"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5662,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.85528711,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"16","issue":"3","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9845576286315918},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.803536593914032},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6433255672454834},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6148543357849121},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.591183066368103},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.552832305431366},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5471656322479248},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5382461547851562},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.5315805673599243},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46284219622612},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4446999430656433},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3586682677268982},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3324124217033386},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07702064514160156},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.061724185943603516}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9845576286315918},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.803536593914032},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6433255672454834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6148543357849121},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.591183066368103},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.552832305431366},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5471656322479248},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5382461547851562},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.5315805673599243},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46284219622612},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4446999430656433},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3586682677268982},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3324124217033386},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07702064514160156},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.061724185943603516},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3394955","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394955","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7909899724","display_name":null,"funder_award_id":"61502157, 61502158, and 61502137","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W1969616664","https://openalex.org/W2024082504","https://openalex.org/W2117539524","https://openalex.org/W2173180041","https://openalex.org/W2185175083","https://openalex.org/W2186222003","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2550553598","https://openalex.org/W2552161745","https://openalex.org/W2560645892","https://openalex.org/W2575842049","https://openalex.org/W2596164567","https://openalex.org/W2614606538","https://openalex.org/W2745461083","https://openalex.org/W2754689878","https://openalex.org/W2795151422","https://openalex.org/W2803620531","https://openalex.org/W2807718466","https://openalex.org/W2808071176","https://openalex.org/W2808138519","https://openalex.org/W2808663243","https://openalex.org/W2887272576","https://openalex.org/W2887585070","https://openalex.org/W2887712318","https://openalex.org/W2890531016","https://openalex.org/W2890718122","https://openalex.org/W2904551248","https://openalex.org/W2905288264","https://openalex.org/W2906314281","https://openalex.org/W2908356592","https://openalex.org/W2913618459","https://openalex.org/W2962958773","https://openalex.org/W2962982762","https://openalex.org/W2963088515","https://openalex.org/W2963101956","https://openalex.org/W2963138277","https://openalex.org/W2963170456","https://openalex.org/W2963175879","https://openalex.org/W2963992143","https://openalex.org/W2965846473","https://openalex.org/W2979747405","https://openalex.org/W2983141445","https://openalex.org/W2988823324","https://openalex.org/W3035323998","https://openalex.org/W3105136412","https://openalex.org/W4236965008"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W3217195652","https://openalex.org/W2389087714"],"abstract_inverted_index":{"The":[0],"attention":[1,35,84,103],"mechanism":[2,36,85],"has":[3],"been":[4,212],"established":[5],"as":[6],"an":[7,23,64,246],"effective":[8],"method":[9],"for":[10,52,94,174,199,206,249],"generating":[11],"caption":[12,29],"words":[13],"in":[14,22,71,97,124,158,196,252],"image":[15,24,98,200,216,253],"captioning;":[16],"it":[17,169],"explores":[18],"one":[19,107,111],"noticed":[20,112],"subregion":[21,113],"to":[25,41,62,90,109,126,147,192,228],"predict":[26,49],"a":[27,43,82,148,164],"related":[28],"word.":[30],"However,":[31],"even":[32],"though":[33],"the":[34,45,58,68,136,160,171,204],"could":[37,138,237],"offer":[38],"accurate":[39],"subregions":[40,130,145],"train":[42],"model,":[44],"learned":[46],"captioner":[47],"may":[48],"wrong,":[50],"especially":[51],"visual":[53,87,95,132,165,175,181,194,250],"concept":[54,88,133,182],"words,":[55],"which":[56,80,202,244],"are":[57,224],"most":[59],"important":[60],"parts":[61],"understand":[63],"image.":[65],"To":[66],"tackle":[67],"preceding":[69],"problem,":[70],"this":[72,153],"article":[73],"we":[74],"propose":[75],"Visual":[76,116],"Concept":[77,117],"Enhanced":[78,118],"Captioner,":[79],"employs":[81],"joint":[83,154],"with":[86],"samples":[89,183],"strengthen":[91],"prediction":[92,172],"abilities":[93],"concepts":[96,251],"captioning.":[99,254],"Different":[100],"from":[101,131,184],"traditional":[102],"approaches":[104],"that":[105,234],"adopt":[106],"LSTM":[108],"explore":[110],"each":[114],"time,":[115],"Captioner":[119],"introduces":[120],"multiple":[121,129],"virtual":[122],"LSTMs":[123],"parallel":[125],"simultaneously":[127],"receive":[128],"samples.":[134],"Then,":[135],"model":[137,188],"update":[139],"parameters":[140],"by":[141,178],"jointly":[142],"exploring":[143],"these":[144],"according":[146],"composite":[149],"loss":[150],"function.":[151],"Technically,":[152],"learning":[155,198],"is":[156,232],"helpful":[157],"finding":[159],"common":[161],"characters":[162],"of":[163],"concept,":[166],"and":[167,219,221,241],"thus":[168],"enhances":[170],"accuracy":[173,247],"concepts.":[176],"Moreover,":[177],"integrating":[179],"diverse":[180],"different":[185],"domains,":[186],"our":[187,235],"can":[189],"be":[190],"extended":[191],"bridge":[193],"bias":[195],"cross-domain":[197],"captioning,":[201],"saves":[203],"cost":[205],"labeling":[207],"captions.":[208],"Extensive":[209],"experiments":[210],"have":[211],"conducted":[213],"on":[214],"two":[215],"datasets":[217],"(MSCOCO":[218],"Flickr30K),":[220],"superior":[222],"results":[223],"reported":[225],"when":[226],"comparing":[227],"state-of-the-art":[229],"approaches.":[230],"It":[231],"impressive":[233],"approach":[236],"significantly":[238],"increase":[239],"BLUE-1":[240],"F1":[242],"scores,":[243],"demonstrates":[245],"improvement":[248]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
