{"id":"https://openalex.org/W3211865849","doi":"https://doi.org/10.1145/3460474","title":"Bi-Directional Co-Attention Network for Image Captioning","display_name":"Bi-Directional Co-Attention Network for Image Captioning","publication_year":2021,"publication_date":"2021-11-12","ids":{"openalex":"https://openalex.org/W3211865849","doi":"https://doi.org/10.1145/3460474","mag":"3211865849"},"language":"en","primary_location":{"id":"doi:10.1145/3460474","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460474","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042991224","display_name":"Weitao Jiang","orcid":"https://orcid.org/0000-0002-7168-9357"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weitao Jiang","raw_affiliation_strings":["School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People\u2019s Republic of China","School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People's Republic of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People\u2019s Republic of China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People's Republic of China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101609885","display_name":"Weixuan Wang","orcid":"https://orcid.org/0000-0003-4551-0795"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weixuan Wang","raw_affiliation_strings":["School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People\u2019s Republic of China","School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People's Republic of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People\u2019s Republic of China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People's Republic of China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056953478","display_name":"Haifeng Hu","orcid":"https://orcid.org/0000-0002-4884-323X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Hu","raw_affiliation_strings":["School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People\u2019s Republic of China","School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People's Republic of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People\u2019s Republic of China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, Guangdong, People's Republic of China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042991224"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":3.2973,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.93681407,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"17","issue":"4","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8708402514457703},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8378285765647888},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7057639956474304},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5772356390953064},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.5388493537902832},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5346319675445557},{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.5144434571266174},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4990379810333252},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45746761560440063},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.4504649043083191},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4161926805973053},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3804815411567688},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10465890169143677}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8708402514457703},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8378285765647888},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7057639956474304},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5772356390953064},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.5388493537902832},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5346319675445557},{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.5144434571266174},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4990379810333252},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45746761560440063},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.4504649043083191},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4161926805973053},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3804815411567688},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10465890169143677},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3460474","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460474","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G4228998522","display_name":null,"funder_award_id":"2017A030311029","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G5975937547","display_name":null,"funder_award_id":"62076262, 61673402, 61273270, and 60802069","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1811254738","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W1969616664","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2108598243","https://openalex.org/W2176263492","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2506483933","https://openalex.org/W2550553598","https://openalex.org/W2558834163","https://openalex.org/W2564898401","https://openalex.org/W2575842049","https://openalex.org/W2600463316","https://openalex.org/W2607768201","https://openalex.org/W2729842244","https://openalex.org/W2745461083","https://openalex.org/W2795151422","https://openalex.org/W2885013662","https://openalex.org/W2890531016","https://openalex.org/W2904551248","https://openalex.org/W2911285743","https://openalex.org/W2949197413","https://openalex.org/W2949376505","https://openalex.org/W2963084599","https://openalex.org/W2963717374","https://openalex.org/W2964018924","https://openalex.org/W2983141445","https://openalex.org/W2984138079","https://openalex.org/W2986670728","https://openalex.org/W2990818246","https://openalex.org/W3034655362","https://openalex.org/W3035284526","https://openalex.org/W3082436432","https://openalex.org/W3096691356","https://openalex.org/W3103022576","https://openalex.org/W3107848485","https://openalex.org/W3132366366","https://openalex.org/W6728881024","https://openalex.org/W6741068176"],"related_works":["https://openalex.org/W2729514902","https://openalex.org/W2024160000","https://openalex.org/W2773500201","https://openalex.org/W2061273563","https://openalex.org/W2285052147","https://openalex.org/W4287995534","https://openalex.org/W2743258233","https://openalex.org/W2998168123","https://openalex.org/W1972656095","https://openalex.org/W2970216048"],"abstract_inverted_index":{"Image":[0],"Captioning,":[1],"which":[2,53,179],"automatically":[3],"describes":[4],"an":[5,125],"image":[6,28],"with":[7,85],"natural":[8],"language,":[9],"is":[10,200],"regarded":[11],"as":[12,47],"a":[13,65,92,106,158,169,227],"fundamental":[14],"challenge":[15],"in":[16,27],"computer":[17],"vision.":[18],"In":[19,60,192],"recent":[20],"years,":[21],"significant":[22],"advance":[23],"has":[24],"been":[25],"made":[26],"captioning":[29],"through":[30],"improving":[31],"attention":[32,39,116,152],"mechanism.":[33],"However,":[34],"most":[35,119],"existing":[36,120,196],"methods":[37,121],"construct":[38],"mechanisms":[40],"based":[41],"on":[42,101,231],"singular":[43],"visual":[44,73,98,208],"features,":[45,52],"such":[46],"patch":[48],"features":[49,74,82,163,174,209],"or":[50,133],"object":[51],"limits":[54],"the":[55,144,165,176,182,185,189,195,198,211,220,239,243,247],"accuracy":[56],"of":[57,164,175,184,229],"generated":[58],"captions.":[59],"this":[61,140],"article,":[62],"we":[63,103,142,155],"propose":[64,105,157],"Bidirectional":[66],"Co-Attention":[67],"Network":[68],"(BCAN)":[69],"that":[70,242],"combines":[71],"multiple":[72,97,207],"to":[75,113,135,149,161,172,194,202],"provide":[76],"information":[77,205,218],"from":[78,206],"different":[79,87,177],"aspects.":[80],"Different":[81],"are":[83,91],"associated":[84],"predicting":[86],"words,":[88],"and":[89,108,168,188,215,236,238],"there":[90],"priori":[93],"relations":[94],"between":[95],"these":[96],"features.":[99,137,153],"Based":[100],"this,":[102],"further":[104,156],"bottom-up":[107,186],"top-down":[109,190],"bi-directional":[110,212],"co-attention":[111,187,213],"mechanism":[112],"extract":[114],"discriminative":[115],"information.":[117],"Furthermore,":[118],"do":[122],"not":[123],"exploit":[124],"effective":[126],"multimodal":[127,151,217],"integration":[128],"strategy,":[129,214],"generally":[130],"using":[131],"addition":[132],"concatenation":[134],"combine":[136,173],"To":[138],"solve":[139],"problem,":[141],"adopt":[143],"Multivariate":[145],"Residual":[146],"Module":[147],"(MRM)":[148],"integrate":[150,162,216],"Meanwhile,":[154],"Vertical":[159],"MRM":[160,171],"same":[166],"category,":[167],"Horizontal":[170],"categories,":[178],"can":[180],"balance":[181],"contribution":[183],"co-attention.":[191],"contrast":[193],"methods,":[197],"BCAN":[199,245],"able":[201],"obtain":[203],"complementary":[204],"via":[210,219],"improved":[221],"multivariate":[222],"residual":[223],"strategy.":[224],"We":[225],"conduct":[226],"series":[228],"experiments":[230],"two":[232],"benchmark":[233],"datasets":[234],"(MSCOCO":[235],"Flickr30k),":[237],"results":[240],"indicate":[241],"proposed":[244],"achieves":[246],"superior":[248],"performance.":[249]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":10}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
