{"id":"https://openalex.org/W4286488082","doi":"https://doi.org/10.1145/3550276","title":"Semantic Embedding Guided Attention with Explicit Visual Feature Fusion for Video Captioning","display_name":"Semantic Embedding Guided Attention with Explicit Visual Feature Fusion for Video Captioning","publication_year":2022,"publication_date":"2022-07-22","ids":{"openalex":"https://openalex.org/W4286488082","doi":"https://doi.org/10.1145/3550276"},"language":"en","primary_location":{"id":"doi:10.1145/3550276","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3550276","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004858240","display_name":"Shan-Shan Dong","orcid":"https://orcid.org/0000-0002-2500-9488"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shanshan Dong","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049500421","display_name":"Tian-Zi Niu","orcid":"https://orcid.org/0000-0002-7389-5883"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianzi Niu","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052168662","display_name":"Xin Luo","orcid":"https://orcid.org/0000-0002-6901-5476"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Luo","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068917997","display_name":"Wu Liu","orcid":"https://orcid.org/0000-0003-1633-7575"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Liu","raw_affiliation_strings":["JD AI Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"JD AI Research, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086235570","display_name":"Xin-Shun Xu","orcid":"https://orcid.org/0000-0001-9972-7370"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinshun Xu","raw_affiliation_strings":["School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5004858240"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":1.1083,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.78439375,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"19","issue":"2","first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8810923099517822},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8074670433998108},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.7447299361228943},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6393229365348816},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6042045950889587},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5577107667922974},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5533666610717773},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49920654296875},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.49647361040115356},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.4638986587524414},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.22425681352615356},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.08823329210281372}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8810923099517822},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8074670433998108},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.7447299361228943},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6393229365348816},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6042045950889587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5577107667922974},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5533666610717773},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49920654296875},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.49647361040115356},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.4638986587524414},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.22425681352615356},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.08823329210281372},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3550276","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3550276","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1658115842","display_name":null,"funder_award_id":"62172256 and 61872428","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5877029504","display_name":null,"funder_award_id":"ZR2019ZD06","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2133459682","https://openalex.org/W2607326921","https://openalex.org/W2739107216","https://openalex.org/W2808203533","https://openalex.org/W2897927512","https://openalex.org/W2945223572","https://openalex.org/W2964057271","https://openalex.org/W2996817764","https://openalex.org/W3162529291","https://openalex.org/W7025119557"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W1968265719"],"abstract_inverted_index":{"Video":[0],"captioning,":[1],"which":[2,92,135],"bridges":[3],"vision":[4],"and":[5,19,24,40,58,110,183,207],"language,":[6],"is":[7,27,49,155,186],"a":[8,50,73,124,143],"fundamental":[9],"yet":[10],"challenging":[11],"task":[12,64],"in":[13,91,117,148,219],"computer":[14],"vision.":[15],"To":[16,67,191],"generate":[17,142],"accurate":[18],"comprehensive":[20],"sentences,":[21],"both":[22],"visual":[23,55,82,108,113,169,182],"semantic":[25,52,59,76,129,151,179,184],"information":[26,154],"quite":[28],"important.":[29],"However,":[30],"most":[31,167],"existing":[32],"methods":[33],"simply":[34],"concatenate":[35],"different":[36],"types":[37],"of":[38,115,221],"features":[39,114,170],"ignore":[41],"the":[42,63,103,138,150,159,166,178,193],"interactions":[43,105],"between":[44,54,106,181],"them.":[45],"In":[46,175],"addition,":[47],"there":[48],"large":[51],"gap":[53,180],"feature":[56],"space":[57,185],"embedding":[60,77,130,153],"space,":[61],"making":[62],"very":[65],"challenging.":[66],"address":[68],"these":[69],"issues,":[70],"we":[71,93,122,196],"propose":[72,123],"framework":[74],"named":[75],"guided":[78,131],"attention":[79,126,132,140,145,164],"with":[80,137],"Explicit":[81],"Feature":[83],"Fusion":[84],"for":[85,89],"vidEo":[86],"CapTioning,":[87],"EFFECT":[88],"short,":[90],"design":[94],"an":[95,118],"explicit":[96,119],"visual-feature":[97],"fusion":[98],"(EVF)":[99],"scheme":[100],"to":[101,141,157,161,165,188],"capture":[102],"pairwise":[104],"multiple":[107],"modalities":[109],"fuse":[111],"multimodal":[112],"videos":[116],"way.":[120],"Furthermore,":[121],"novel":[125],"mechanism":[127],"called":[128],"(SEGA":[133],"),":[134],"cooperates":[136],"temporal":[139],"joint":[144],"map.":[146],"Specifically,":[147],"SEGA,":[149],"word":[152],"leveraged":[156],"guide":[158],"model":[160],"pay":[162],"more":[163],"correlated":[168],"at":[171],"each":[172],"decoding":[173],"stage.":[174],"this":[176],"way,":[177],"alleviated":[187],"some":[189],"extent.":[190],"evaluate":[192],"proposed":[194],"model,":[195],"conduct":[197],"extensive":[198],"experiments":[199],"on":[200],"two":[201],"widely":[202],"used":[203],"datasets,":[204],"i.e.,":[205],"MSVD":[206],"MSR-VTT.":[208],"The":[209],"experimental":[210],"results":[211,218],"demonstrate":[212],"that":[213],"our":[214],"approach":[215],"achieves":[216],"state-of-the-art":[217],"terms":[220],"four":[222],"evaluation":[223],"metrics.":[224]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
