{"id":"https://openalex.org/W4286212078","doi":"https://doi.org/10.1109/tnnls.2022.3190367","title":"An Effective Video Transformer With Synchronized Spatiotemporal and Spatial Self-Attention for Action Recognition","display_name":"An Effective Video Transformer With Synchronized Spatiotemporal and Spatial Self-Attention for Action Recognition","publication_year":2022,"publication_date":"2022-07-20","ids":{"openalex":"https://openalex.org/W4286212078","doi":"https://doi.org/10.1109/tnnls.2022.3190367","pmid":"https://pubmed.ncbi.nlm.nih.gov/35857731"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2022.3190367","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3190367","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042097351","display_name":"Saghir Alfasly","orcid":"https://orcid.org/0000-0002-8561-660X"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210142539","display_name":"Guangdong Institute of Intelligent Manufacturing","ror":"https://ror.org/049jpjz09","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210142539"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Saghir Alfasly","raw_affiliation_strings":["Shenzhen Key Laboratory of Advanced Machine Learning and Applications, College of Mathematics and Statistics, Shenzhen University, Shenzhen, China","Guangdong Key Laboratory of Intelligent Information Processing, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-8561-660X","affiliations":[{"raw_affiliation_string":"Shenzhen Key Laboratory of Advanced Machine Learning and Applications, College of Mathematics and Statistics, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Key Laboratory of Intelligent Information Processing, Shenzhen, China","institution_ids":["https://openalex.org/I4210142539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103210224","display_name":"Charles K. Chui","orcid":"https://orcid.org/0000-0003-2482-8980"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charles K. Chui","raw_affiliation_strings":["Department of Statistics, Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-2482-8980","affiliations":[{"raw_affiliation_string":"Department of Statistics, Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027322245","display_name":"Qingtang Jiang","orcid":"https://orcid.org/0000-0002-0173-9988"},"institutions":[{"id":"https://openalex.org/I208333798","display_name":"University of Missouri\u2013St. Louis","ror":"https://ror.org/037cnag11","country_code":"US","type":"education","lineage":["https://openalex.org/I208333798"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qingtang Jiang","raw_affiliation_strings":["Department of Mathematics and Statistics, University of Missouri&#x2013;St. Louis, St. Louis, MO, USA"],"raw_orcid":"https://orcid.org/0000-0002-0173-9988","affiliations":[{"raw_affiliation_string":"Department of Mathematics and Statistics, University of Missouri&#x2013;St. Louis, St. Louis, MO, USA","institution_ids":["https://openalex.org/I208333798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066196985","display_name":"Jian L\u00fc","orcid":"https://orcid.org/0000-0003-4599-7281"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I200296433","display_name":"Chinese Academy of Medical Sciences & Peking Union Medical College","ror":"https://ror.org/02drdmm93","country_code":"CN","type":"education","lineage":["https://openalex.org/I200296433"]},{"id":"https://openalex.org/I4210112812","display_name":"National Supercomputing Center in Shenzhen","ror":"https://ror.org/02291hh73","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210112812"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Lu","raw_affiliation_strings":["Shenzhen Key Laboratory of Advanced Machine Learning and Applications, College of Mathematics and Statistics, Shenzhen University, Shenzhen, China","National Center for Applied Mathematics Shenzhen (NCAMS), Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-4599-7281","affiliations":[{"raw_affiliation_string":"Shenzhen Key Laboratory of Advanced Machine Learning and Applications, College of Mathematics and Statistics, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"National Center for Applied Mathematics Shenzhen (NCAMS), Shenzhen, China","institution_ids":["https://openalex.org/I4210112812","https://openalex.org/I200296433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100385730","display_name":"Xu Chen","orcid":"https://orcid.org/0000-0002-7299-3238"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210142539","display_name":"Guangdong Institute of Intelligent Manufacturing","ror":"https://ror.org/049jpjz09","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210142539"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Xu","raw_affiliation_strings":["Shenzhen Key Laboratory of Advanced Machine Learning and Applications, College of Mathematics and Statistics, Shenzhen University, Shenzhen, China","Guangdong Key Laboratory of Intelligent Information Processing, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen Key Laboratory of Advanced Machine Learning and Applications, College of Mathematics and Statistics, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Key Laboratory of Intelligent Information Processing, Shenzhen, China","institution_ids":["https://openalex.org/I4210142539"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5042097351"],"corresponding_institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4210142539"],"apc_list":null,"apc_paid":null,"fwci":4.1837,"has_fulltext":false,"cited_by_count":44,"citation_normalized_percentile":{"value":0.9536154,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"35","issue":"2","first_page":"2496","last_page":"2509"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7502961158752441},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6524457931518555},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5886940360069275},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5690981149673462},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.504703164100647},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5010781288146973},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33418935537338257},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1092105507850647}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7502961158752441},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6524457931518555},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5886940360069275},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5690981149673462},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.504703164100647},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5010781288146973},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33418935537338257},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1092105507850647},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2022.3190367","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3190367","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:35857731","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35857731","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1795570172","display_name":null,"funder_award_id":"2020B1515310008","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G3105280189","display_name":null,"funder_award_id":"U21A20455","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3338363611","display_name":null,"funder_award_id":"61872429","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4630108110","display_name":null,"funder_award_id":"11871348","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6305693123","display_name":null,"funder_award_id":"W911NF2110218","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G7397269034","display_name":null,"funder_award_id":"353185","funder_id":"https://openalex.org/F4320306164","funder_display_name":"Simons Foundation"},{"id":"https://openalex.org/G8017764705","display_name":null,"funder_award_id":"61972265","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306164","display_name":"Simons Foundation","ror":"https://ror.org/01cmst727"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W153185079","https://openalex.org/W787785461","https://openalex.org/W2126579184","https://openalex.org/W2136853139","https://openalex.org/W2506886870","https://openalex.org/W2507009361","https://openalex.org/W2524365899","https://openalex.org/W2625366777","https://openalex.org/W2739179646","https://openalex.org/W2746007478","https://openalex.org/W2772805989","https://openalex.org/W2782776028","https://openalex.org/W2900585237","https://openalex.org/W2903691278","https://openalex.org/W2942750779","https://openalex.org/W2951906878","https://openalex.org/W2962722475","https://openalex.org/W2962843773","https://openalex.org/W2963524571","https://openalex.org/W2971380191","https://openalex.org/W2978927732","https://openalex.org/W2990152177","https://openalex.org/W2990503944","https://openalex.org/W3002181557","https://openalex.org/W3007751154","https://openalex.org/W3025388795","https://openalex.org/W3034572008","https://openalex.org/W3035104321","https://openalex.org/W3035303837","https://openalex.org/W3035422918","https://openalex.org/W3035619757","https://openalex.org/W3049411219","https://openalex.org/W3080358492","https://openalex.org/W3096833468","https://openalex.org/W3123784868","https://openalex.org/W3125082963","https://openalex.org/W3138516171","https://openalex.org/W3146918276","https://openalex.org/W3151130473","https://openalex.org/W3193810785","https://openalex.org/W4214516465","https://openalex.org/W4214555767","https://openalex.org/W4214612132","https://openalex.org/W4214614183","https://openalex.org/W4312560592","https://openalex.org/W6600983433","https://openalex.org/W6622789128","https://openalex.org/W6682864246","https://openalex.org/W6728047685","https://openalex.org/W6739901393","https://openalex.org/W6766978945","https://openalex.org/W6780226713","https://openalex.org/W6790307280","https://openalex.org/W6797206543","https://openalex.org/W6797263693","https://openalex.org/W6797737728"],"related_works":["https://openalex.org/W4293226380","https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W4321487865","https://openalex.org/W3155418658","https://openalex.org/W4243199227"],"abstract_inverted_index":{"Convolutional":[0],"neural":[1,10],"networks":[2],"(CNNs)":[3],"have":[4,28,44,58,221],"come":[5],"to":[6,61,78,103,111,172,182],"dominate":[7],"vision-based":[8],"deep":[9],"network":[11],"structures":[12],"in":[13,33,51,156,160,232],"both":[14],"image":[15,34],"and":[16,40,85,92,127,138,224,247],"video":[17,42,63,70,105,108,194,214,236],"models":[18,32,84],"over":[19],"the":[20,48,79,88,93,133,147,174,178,183,204],"past":[21],"decade.":[22],"However,":[23],"convolution-free":[24],"vision":[25],"Transformers":[26,43,64,68,73,86],"(ViTs)":[27],"recently":[29],"outperformed":[30],"CNN-based":[31,83],"recognition.":[35],"Despite":[36],"this":[37,97,233],"progress,":[38],"building":[39],"designing":[41],"not":[45],"yet":[46],"obtained":[47],"same":[49],"attention":[50,122,129,150],"research":[52],"as":[53,211],"image-based":[54,67],"Transformers.":[55,109],"While":[56],"there":[57],"been":[59,222],"attempts":[60],"build":[62],"by":[65,151],"adapting":[66],"for":[69],"understanding,":[71],"these":[72],"still":[74],"lack":[75],"efficiency":[76],"due":[77],"large":[80],"gap":[81],"between":[82],"regarding":[87],"number":[89],"of":[90,177,203,229],"parameters":[91],"training":[94],"settings.":[95],"In":[96],"work,":[98],"we":[99,117],"propose":[100,118],"three":[101],"techniques":[102,220],"improve":[104],"understanding":[106],"with":[107,136,191,226],"First,":[110],"derive":[112],"better":[113],"spatiotemporal":[114,121,126,134],"feature":[115],"representation,":[116],"a":[119,166,192,197,227],"new":[120],"scheme,":[123],"termed":[124],"synchronized":[125],"spatial":[128,139,149,153],"(SSTSA),":[130],"which":[131,187],"derives":[132],"features":[135],"temporal":[137],"multiheaded":[140],"self-attention":[141,154],"(MSA)":[142],"modules.":[143],"It":[144],"also":[145],"preserves":[146],"best":[148],"another":[152],"module":[155,169],"parallel,":[157],"thereby":[158],"resulting":[159],"an":[161,212],"effective":[162,213],"Transformer":[163,237],"encoder.":[164],"Second,":[165],"motion":[167,176],"spotlighting":[168],"is":[170,188,207],"proposed":[171,208,219],"embed":[173],"short-term":[175],"consecutive":[179],"input":[180,205],"frames":[181],"regular":[184],"RGB":[185],"input,":[186],"then":[189],"processed":[190],"single-stream":[193],"Transformer.":[195],"Third,":[196],"simple":[198],"intraclass":[199],"frame":[200],"interlacing":[201],"method":[202],"clips":[206],"that":[209],"serves":[210],"augmentation":[215],"method.":[216],"Finally,":[217],"our":[218],"evaluated":[223],"validated":[225],"set":[228],"extensive":[230],"experiments":[231],"study.":[234],"Our":[235],"outperforms":[238],"its":[239],"previous":[240],"counterparts":[241],"on":[242],"two":[243],"well-known":[244],"datasets,":[245],"Kinetics400":[246],"Something-Something-v2.":[248]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
