{"id":"https://openalex.org/W3161079477","doi":"https://doi.org/10.1109/icassp39728.2021.9413691","title":"Cascade Attention Fusion for Fine-Grained Image Captioning Based on Multi-Layer LSTM","display_name":"Cascade Attention Fusion for Fine-Grained Image Captioning Based on Multi-Layer LSTM","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3161079477","doi":"https://doi.org/10.1109/icassp39728.2021.9413691","mag":"3161079477"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413691","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065660222","display_name":"Shuang Wang","orcid":"https://orcid.org/0000-0003-4940-1211"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuang Wang","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051828914","display_name":"Yun Meng","orcid":"https://orcid.org/0000-0002-2317-9379"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Meng","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100749025","display_name":"Yu Gu","orcid":"https://orcid.org/0000-0003-3634-2275"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Gu","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100731323","display_name":"Lei Zhang","orcid":"https://orcid.org/0000-0001-5797-5480"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Zhang","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091541090","display_name":"Xiutiao Ye","orcid":"https://orcid.org/0009-0002-0088-606X"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiutiao Ye","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077593544","display_name":"Jingxian Tian","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingxian Tian","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050630882","display_name":"Licheng Jiao","orcid":"https://orcid.org/0000-0003-3354-9617"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Licheng Jiao","raw_affiliation_strings":["Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Key Laboratory of Intelligent Perception and Image Understanding of Ministry of Education, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":0.4852,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.64112487,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2245","last_page":"2249"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9822301864624023},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.8548778295516968},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8402904868125916},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7709058523178101},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6791049242019653},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6361582279205322},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.5356462001800537},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4661945402622223},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44658592343330383},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.419175386428833},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39867299795150757},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33990275859832764},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.31616201996803284}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9822301864624023},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.8548778295516968},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8402904868125916},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7709058523178101},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6791049242019653},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6361582279205322},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.5356462001800537},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4661945402622223},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44658592343330383},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.419175386428833},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39867299795150757},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33990275859832764},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31616201996803284},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413691","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2133459682","https://openalex.org/W2154652894","https://openalex.org/W2250539671","https://openalex.org/W2302086703","https://openalex.org/W2506483933","https://openalex.org/W2575842049","https://openalex.org/W2613718673","https://openalex.org/W2745461083","https://openalex.org/W2754927243","https://openalex.org/W2774267535","https://openalex.org/W2889924266","https://openalex.org/W2904551248","https://openalex.org/W2907279673","https://openalex.org/W2938603906","https://openalex.org/W2962760898","https://openalex.org/W2963084599","https://openalex.org/W2963499204","https://openalex.org/W2965846473","https://openalex.org/W6620707391","https://openalex.org/W6639102338","https://openalex.org/W6682631176","https://openalex.org/W6747225742","https://openalex.org/W6761185680","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W3161079477"],"abstract_inverted_index":{"The":[0],"conventional":[1],"visual":[2,47,78,84],"attention-based":[3],"image":[4,9,61,116],"captioning":[5,117],"approaches":[6],"typically":[7],"use":[8],"information":[10],"to":[11,20,58,103,119],"guide":[12],"caption":[13],"generation.":[14],"Results":[15],"from":[16],"these":[17],"models":[18],"tend":[19],"be":[21],"coarse":[22],"and":[23,34,48,68,114,123,143],"ignore":[24],"the":[25,28,35,77,86,96,124,140,149],"details":[26],"in":[27],"image,":[29],"such":[30],"as":[31],"objects,":[32,66],"attributes":[33,67],"distinguishing":[36,70],"aspects":[37,71],"of":[38,72],"each":[39],"image.":[40],"In":[41,74],"this":[42],"paper,":[43],"we":[44],"propose":[45],"a":[46,53,135,144],"semantic":[49,87,93],"fusion":[50],"network":[51],"with":[52],"margin-based":[54,98],"training":[55],"guidance":[56],"mechanism":[57],"generate":[59],"fine":[60],"descriptions":[62],"that":[63],"depict":[64],"more":[65,82,91,105],"other":[69],"images.":[73],"our":[75,101,121],"model,":[76],"attention":[79,88],"layer":[80,89],"introduces":[81],"low-level":[83],"information,":[85],"provides":[90],"high-level":[92],"attributes.":[94],"Furthermore,":[95],"proposed":[97],"loss":[99],"encourages":[100],"model":[102],"produce":[104],"discriminative":[106],"descriptions.":[107],"Extensive":[108],"experiments":[109],"are":[110],"conducted":[111],"on":[112,139,148],"COCO":[113],"Flickr30K":[115,141],"datasets":[118],"validate":[120],"method,":[122],"results":[125],"show":[126],"its":[127],"superior":[128],"performance":[129],"at":[130],"captioning.":[131],"Our":[132],"method":[133],"achieves":[134],"state-of-the-art":[136],"70.6":[137],"CIDEr-D":[138,147],"dataset,":[142],"competitive":[145],"123.5":[146],"MS-COCO":[150],"dataset.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
