{"id":"https://openalex.org/W2953461088","doi":"https://doi.org/10.1109/tmm.2019.2924576","title":"STAT: Spatial-Temporal Attention Mechanism for Video Captioning","display_name":"STAT: Spatial-Temporal Attention Mechanism for Video Captioning","publication_year":2019,"publication_date":"2019-06-25","ids":{"openalex":"https://openalex.org/W2953461088","doi":"https://doi.org/10.1109/tmm.2019.2924576","mag":"2953461088"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2019.2924576","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2019.2924576","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054311881","display_name":"Chenggang Yan","orcid":"https://orcid.org/0000-0003-1204-0512"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenggang Yan","raw_affiliation_strings":["Institute of Information and Control, Hangzhou Dianzi University, Hangzhou, China","School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information and Control, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]},{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021557044","display_name":"Yunbin Tu","orcid":"https://orcid.org/0000-0002-9525-9060"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunbin Tu","raw_affiliation_strings":["Institute of Information and Control, Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information and Control, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103030459","display_name":"Xingzheng Wang","orcid":"https://orcid.org/0000-0003-4080-6888"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingzheng Wang","raw_affiliation_strings":["College of Mechatronics and Control Engineering, Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"College of Mechatronics and Control Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101653272","display_name":"Yongbing Zhang","orcid":"https://orcid.org/0009-0008-1568-3728"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongbing Zhang","raw_affiliation_strings":["Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067521894","display_name":"Xinhong Hao","orcid":"https://orcid.org/0000-0002-6448-4839"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhong Hao","raw_affiliation_strings":["Science and Technology on Mechatronic Dynamic Control Laboratory, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Mechatronic Dynamic Control Laboratory, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080722708","display_name":"Qionghai Dai","orcid":"https://orcid.org/0000-0001-7043-3061"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qionghai Dai","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5054311881"],"corresponding_institution_ids":["https://openalex.org/I126520041","https://openalex.org/I50760025"],"apc_list":null,"apc_paid":null,"fwci":25.4396,"has_fulltext":false,"cited_by_count":403,"citation_normalized_percentile":{"value":0.99651628,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"22","issue":"1","first_page":"229","last_page":"241"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9184736013412476},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9129961729049683},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.613149881362915},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5877492427825928},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.5327689051628113},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5250893831253052},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.44281673431396484},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.421505331993103},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41644108295440674},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.13232466578483582}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9184736013412476},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9129961729049683},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.613149881362915},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5877492427825928},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.5327689051628113},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5250893831253052},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.44281673431396484},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.421505331993103},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41644108295440674},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.13232466578483582},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2019.2924576","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2019.2924576","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G5144403962","display_name":null,"funder_award_id":"2017YFC0820604","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5258641124","display_name":null,"funder_award_id":"2017YFC0820605","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6023223561","display_name":null,"funder_award_id":"2017YFC0820600","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6502399670","display_name":null,"funder_award_id":"2019041","funder_id":"https://openalex.org/F4320325571","funder_display_name":"Shenzhen University"},{"id":"https://openalex.org/G8087118743","display_name":null,"funder_award_id":"61671196","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8290336390","display_name":null,"funder_award_id":"61525206","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8790782256","display_name":null,"funder_award_id":"61701149","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320325571","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W318042436","https://openalex.org/W639708223","https://openalex.org/W1514535095","https://openalex.org/W1522734439","https://openalex.org/W1569095176","https://openalex.org/W1573040851","https://openalex.org/W1586939924","https://openalex.org/W1889081078","https://openalex.org/W1895577753","https://openalex.org/W1930223417","https://openalex.org/W1956340063","https://openalex.org/W2016053056","https://openalex.org/W2046670074","https://openalex.org/W2097117768","https://openalex.org/W2101105183","https://openalex.org/W2102605133","https://openalex.org/W2109463015","https://openalex.org/W2113758732","https://openalex.org/W2121063440","https://openalex.org/W2133459682","https://openalex.org/W2145020494","https://openalex.org/W2163424005","https://openalex.org/W2163917317","https://openalex.org/W2164290393","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2302086703","https://openalex.org/W2425121537","https://openalex.org/W2501167881","https://openalex.org/W2507365558","https://openalex.org/W2519255234","https://openalex.org/W2523937091","https://openalex.org/W2523993696","https://openalex.org/W2527349934","https://openalex.org/W2584992898","https://openalex.org/W2735673432","https://openalex.org/W2737030113","https://openalex.org/W2739107216","https://openalex.org/W2754689878","https://openalex.org/W2760998155","https://openalex.org/W2762383441","https://openalex.org/W2765658575","https://openalex.org/W2786585376","https://openalex.org/W2807722546","https://openalex.org/W2808203533","https://openalex.org/W2887712318","https://openalex.org/W2905172366","https://openalex.org/W2963152432","https://openalex.org/W2963465031","https://openalex.org/W2963576560","https://openalex.org/W2963843052","https://openalex.org/W2964241990","https://openalex.org/W3099677434","https://openalex.org/W4294650876","https://openalex.org/W6600284362","https://openalex.org/W6630875275","https://openalex.org/W6639432524","https://openalex.org/W6684090549","https://openalex.org/W6687037952"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W2963177403","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4289422896"],"abstract_inverted_index":{"Video":[0],"captioning":[1],"refers":[2],"to":[3,33,71,115],"automatic":[4],"generate":[5],"natural":[6],"language":[7],"sentences,":[8],"which":[9],"summarize":[10],"the":[11,16,50,77,102,113,118,122,149],"video":[12,31,92],"contents.":[13],"Inspired":[14],"by":[15],"visual":[17],"attention":[18,24,46,60,83],"mechanism":[19,25,47,61,84],"of":[20,52,79],"human":[21],"beings,":[22],"temporal":[23,45,59,105,125],"has":[26],"been":[27],"widely":[28],"used":[29],"in":[30,67,107,121],"description":[32],"selectively":[34],"focus":[35],"on":[36,44,134],"important":[37],"frames.":[38,68],"However,":[39],"most":[40,123],"existing":[41],"methods":[42],"based":[43],"suffer":[48],"from":[49],"problems":[51],"recognition":[53],"error":[54],"and":[55,104,139,159],"detail":[56],"missing,":[57],"because":[58],"cannot":[62],"further":[63],"catch":[64],"significant":[65,119],"regions":[66,120],"In":[69],"order":[70],"address":[72],"above":[73],"problems,":[74],"we":[75],"propose":[76],"use":[78],"a":[80,108],"novel":[81],"spatial-temporal":[82],"(STAT)":[85],"within":[86],"an":[87],"encoder-decoder":[88],"neural":[89],"network":[90],"for":[91,127],"captioning.":[93],"The":[94],"proposed":[95,146],"STAT":[96,133,147],"successfully":[97],"takes":[98],"into":[99],"account":[100],"both":[101],"spatial":[103],"structures":[106],"video,":[109],"so":[110],"it":[111],"makes":[112],"decoder":[114],"automatically":[116],"select":[117],"relevant":[124],"segments":[126],"word":[128],"prediction.":[129],"We":[130],"evaluate":[131],"our":[132,145],"two":[135],"well-known":[136],"benchmarks:":[137],"MSVD":[138],"MSR-VTT-10K.":[140],"Experimental":[141],"results":[142],"show":[143],"that":[144],"achieves":[148],"state-of-the-art":[150],"performance":[151],"with":[152],"several":[153],"popular":[154],"evaluation":[155],"metrics:":[156],"BLEU-4,":[157],"METEOR,":[158],"CIDEr.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":39},{"year":2024,"cited_by_count":38},{"year":2023,"cited_by_count":73},{"year":2022,"cited_by_count":60},{"year":2021,"cited_by_count":73},{"year":2020,"cited_by_count":84},{"year":2019,"cited_by_count":32}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
