{"id":"https://openalex.org/W4323662761","doi":"https://doi.org/10.1145/3587252","title":"Video Captioning by Learning from Global Sentence and Looking Ahead","display_name":"Video Captioning by Learning from Global Sentence and Looking Ahead","publication_year":2023,"publication_date":"2023-03-09","ids":{"openalex":"https://openalex.org/W4323662761","doi":"https://doi.org/10.1145/3587252"},"language":"en","primary_location":{"id":"doi:10.1145/3587252","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3587252","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049500421","display_name":"Tian-Zi Niu","orcid":"https://orcid.org/0000-0002-7389-5883"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tian-Zi Niu","raw_affiliation_strings":["The School of Software, Shandong University, China"],"affiliations":[{"raw_affiliation_string":"The School of Software, Shandong University, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021275163","display_name":"Zhen-Duo Chen","orcid":"https://orcid.org/0000-0002-3481-4892"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen-Duo Chen","raw_affiliation_strings":["The School of Software, Shandong University, China"],"affiliations":[{"raw_affiliation_string":"The School of Software, Shandong University, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052168662","display_name":"Xin Luo","orcid":"https://orcid.org/0000-0002-6901-5476"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Luo","raw_affiliation_strings":["The School of Software, Shandong University, China"],"affiliations":[{"raw_affiliation_string":"The School of Software, Shandong University, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354128","display_name":"Peng-Fei Zhang","orcid":"https://orcid.org/0000-0002-6790-2098"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Peng-Fei Zhang","raw_affiliation_strings":["The School of Information Technology and Electrical Engineering, The University of Queensland, Australia"],"affiliations":[{"raw_affiliation_string":"The School of Information Technology and Electrical Engineering, The University of Queensland, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078170935","display_name":"Zi Huang","orcid":"https://orcid.org/0000-0002-9738-4949"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zi Huang","raw_affiliation_strings":["The School of Information Technology and Electrical Engineering, The University of Queensland, Australia"],"affiliations":[{"raw_affiliation_string":"The School of Information Technology and Electrical Engineering, The University of Queensland, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086235570","display_name":"Xin-Shun Xu","orcid":"https://orcid.org/0000-0001-9972-7370"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin-Shun Xu","raw_affiliation_strings":["The School of Software, Shandong University, China"],"affiliations":[{"raw_affiliation_string":"The School of Software, Shandong University, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5049500421"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":0.8609,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.74436329,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"19","issue":"5s","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9778479337692261},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8469969034194946},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7752818465232849},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6040850877761841},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5257074236869812},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5174713730812073},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5152347683906555},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5032088160514832},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4656572937965393},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4496208429336548},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.15625876188278198},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.088711678981781}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9778479337692261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8469969034194946},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7752818465232849},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6040850877761841},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5257074236869812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5174713730812073},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5152347683906555},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5032088160514832},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4656572937965393},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4496208429336548},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.15625876188278198},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.088711678981781},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3587252","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3587252","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.699999988079071,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G3312007031","display_name":null,"funder_award_id":"62172256, 62202278, 62202272","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5877029504","display_name":null,"funder_award_id":"ZR2019ZD06","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1485009520","https://openalex.org/W1573040851","https://openalex.org/W1586939924","https://openalex.org/W1901129140","https://openalex.org/W1956340063","https://openalex.org/W2139501017","https://openalex.org/W2142900973","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2425121537","https://openalex.org/W2739107216","https://openalex.org/W2750779823","https://openalex.org/W2808203533","https://openalex.org/W2905145027","https://openalex.org/W2948358897","https://openalex.org/W2951390634","https://openalex.org/W2962681491","https://openalex.org/W2962934715","https://openalex.org/W2962958773","https://openalex.org/W2962990649","https://openalex.org/W2963084599","https://openalex.org/W2963177403","https://openalex.org/W2963524571","https://openalex.org/W2963843052","https://openalex.org/W2964350391","https://openalex.org/W2970641574","https://openalex.org/W2984862483","https://openalex.org/W2989322838","https://openalex.org/W2990964949","https://openalex.org/W2996817764","https://openalex.org/W3000303105","https://openalex.org/W3009192917","https://openalex.org/W3009960797","https://openalex.org/W3034221024","https://openalex.org/W3035365026","https://openalex.org/W3035372819","https://openalex.org/W3035392611","https://openalex.org/W3039060838","https://openalex.org/W3082436432","https://openalex.org/W3093309458","https://openalex.org/W3103237788","https://openalex.org/W3131045228","https://openalex.org/W3134875898","https://openalex.org/W3163971663","https://openalex.org/W3176425931","https://openalex.org/W3176514808","https://openalex.org/W3176689360","https://openalex.org/W3181186176","https://openalex.org/W3194782062","https://openalex.org/W3211865849","https://openalex.org/W4213031069","https://openalex.org/W4213152894","https://openalex.org/W4214931354","https://openalex.org/W4281560470","https://openalex.org/W4284692156","https://openalex.org/W4286488082","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6772619176"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W3217195652","https://openalex.org/W2944691285"],"abstract_inverted_index":{"Video":[0],"captioning":[1,47,144],"aims":[2],"to":[3,27,88,141,168,214],"automatically":[4],"generate":[5],"natural":[6],"language":[7,90],"sentences":[8,95],"describing":[9],"the":[10,30,105,108,113,122,170,175,195,198,208],"content":[11],"of":[12,33,63,104,107,177,197,210],"a":[13,44,66,71,79,119,147,151,164],"video.":[14],"Although":[15],"encoder-decoder-based":[16],"models":[17],"have":[18,118],"achieved":[19],"promising":[20],"progress,":[21],"it":[22],"is":[23,78],"still":[24],"very":[25],"challenging":[26],"effectively":[28,101],"model":[29,48,93,213],"linguistic":[31],"behavior":[32],"humans":[34],"in":[35,201],"generating":[36,112],"video":[37,46,143,202],"captions.":[38],"In":[39,155,204],"this":[40],"paper,":[41],"we":[42,157,206],"propose":[43,159],"novel":[45,80],"by":[49],"learning":[50],"from":[51],"gLobal":[52],"sEntence":[53],"and":[54,70,92,129,137,163,192],"looking":[55],"AheaD,":[56],"LEAD":[57,61,131,184],"for":[58],"short.":[59],"Specifically,":[60],"consists":[62],"two":[64],"modules:":[65],"Vision":[67],"Module":[68,73],"(VM)":[69],"Language":[72],"(LM)":[74],".":[75],"Thereinto,":[76],"VM":[77,128],"attention":[81],"network,":[82],"which":[83,172],"can":[84,98,132,173],"map":[85],"visual":[86],"features":[87],"high-level":[89],"space":[91],"entire":[94],"explicitly.":[96],"LM":[97],"not":[99],"only":[100],"make":[102,142],"use":[103],"information":[106,136,140,178],"previous":[109],"sequence":[110],"when":[111],"current":[114],"word,":[115],"but":[116],"also":[117,158],"look":[120],"at":[121],"future":[123,138],"word.":[124],"Therefore,":[125],"based":[126],"on":[127,189],"LM,":[130],"obtain":[133],"global":[134],"sentence":[135,153],"word":[139],"more":[145],"like":[146],"fill-in-the-blank":[148],"task":[149],"than":[150],"word-by-word":[152],"generation.":[154],"addition,":[156,205],"an":[160],"autonomous":[161],"strategy":[162],"multi-stage":[165],"training":[166],"scheme":[167],"optimize":[169],"model,":[171],"mitigate":[174],"problem":[176],"leakage.":[179],"Extensive":[180],"experiments":[181],"show":[182],"that":[183],"outperforms":[185],"some":[186],"state-of-the-art":[187],"methods":[188],"MSR-VTT,":[190],"MSVD,":[191],"VATEX,":[193],"demonstrating":[194],"effectiveness":[196],"proposed":[199,212],"approach":[200],"captioning.":[203],"release":[207],"code":[209],"our":[211],"be":[215],"publicly":[216],"available.":[217],"1":[218]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
