{"id":"https://openalex.org/W4400447308","doi":"https://doi.org/10.1109/tcsvt.2024.3425598","title":"VS-TransGRU: A Novel Transformer-GRU-Based Framework Enhanced by Visual-Semantic Fusion for Egocentric Action Anticipation","display_name":"VS-TransGRU: A Novel Transformer-GRU-Based Framework Enhanced by Visual-Semantic Fusion for Egocentric Action Anticipation","publication_year":2024,"publication_date":"2024-07-09","ids":{"openalex":"https://openalex.org/W4400447308","doi":"https://doi.org/10.1109/tcsvt.2024.3425598"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3425598","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3425598","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013382259","display_name":"Congqi Cao","orcid":"https://orcid.org/0000-0002-0217-9791"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Congqi Cao","raw_affiliation_strings":["National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108302272","display_name":"Ze Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ze Sun","raw_affiliation_strings":["National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015584209","display_name":"Qinyi Lv","orcid":"https://orcid.org/0000-0002-5359-5701"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinyi Lv","raw_affiliation_strings":["School of Electronic and Information, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058520218","display_name":"Lingtong Min","orcid":"https://orcid.org/0000-0003-3970-7823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingtong Min","raw_affiliation_strings":["School of Electronic and Information, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028235866","display_name":"Yanning Zhang","orcid":"https://orcid.org/0000-0002-2977-8057"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanning Zhang","raw_affiliation_strings":["National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5013382259"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":2.0994,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.88247089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"34","issue":"11","first_page":"11605","last_page":"11618"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9498000144958496,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10648","display_name":"Virtual Reality Applications and Impacts","score":0.9161999821662903,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6415287256240845},{"id":"https://openalex.org/keywords/anticipation","display_name":"Anticipation (artificial intelligence)","score":0.6225563883781433},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5428251624107361},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5358964204788208},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.505385160446167},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.47750037908554077},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4649468958377838},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.4160687029361725},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33971530199050903},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3380487561225891},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11408597230911255},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.10769104957580566},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07295417785644531}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6415287256240845},{"id":"https://openalex.org/C176777502","wikidata":"https://www.wikidata.org/wiki/Q4774623","display_name":"Anticipation (artificial intelligence)","level":2,"score":0.6225563883781433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5428251624107361},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5358964204788208},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.505385160446167},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.47750037908554077},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4649468958377838},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4160687029361725},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33971530199050903},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3380487561225891},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11408597230911255},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.10769104957580566},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07295417785644531},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3425598","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3425598","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5400000214576721}],"awards":[{"id":"https://openalex.org/G1774636401","display_name":null,"funder_award_id":"62301434","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G298578780","display_name":null,"funder_award_id":"62376217","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W1483019628","https://openalex.org/W2064675550","https://openalex.org/W2099614498","https://openalex.org/W2109698606","https://openalex.org/W2157331557","https://openalex.org/W2422305492","https://openalex.org/W2472970127","https://openalex.org/W2507009361","https://openalex.org/W2786807178","https://openalex.org/W2792764867","https://openalex.org/W2800438594","https://openalex.org/W2895299763","https://openalex.org/W2913636123","https://openalex.org/W2945792291","https://openalex.org/W2963524571","https://openalex.org/W2963570630","https://openalex.org/W2963692464","https://openalex.org/W2963720581","https://openalex.org/W2963945905","https://openalex.org/W2984287396","https://openalex.org/W2995301359","https://openalex.org/W3006892894","https://openalex.org/W3021065276","https://openalex.org/W3092798182","https://openalex.org/W3096383329","https://openalex.org/W3106884574","https://openalex.org/W3132567969","https://openalex.org/W3133092047","https://openalex.org/W3161260844","https://openalex.org/W3175928291","https://openalex.org/W3176051609","https://openalex.org/W3195949276","https://openalex.org/W3199862516","https://openalex.org/W3201254286","https://openalex.org/W3207426561","https://openalex.org/W3208955650","https://openalex.org/W4206654595","https://openalex.org/W4212974190","https://openalex.org/W4214555767","https://openalex.org/W4214612132","https://openalex.org/W4287778673","https://openalex.org/W4288104375","https://openalex.org/W4292794020","https://openalex.org/W4312256886","https://openalex.org/W4312281498","https://openalex.org/W4312660844","https://openalex.org/W4312688882","https://openalex.org/W4312873759","https://openalex.org/W4319299948","https://openalex.org/W4319300049","https://openalex.org/W4389299511","https://openalex.org/W4393178472","https://openalex.org/W4394593168","https://openalex.org/W4394596846","https://openalex.org/W4394625740","https://openalex.org/W6676179485","https://openalex.org/W6748310659","https://openalex.org/W6749825310","https://openalex.org/W6749916090","https://openalex.org/W6767337265","https://openalex.org/W6776598532","https://openalex.org/W6778883912","https://openalex.org/W6801505099"],"related_works":["https://openalex.org/W2952741422","https://openalex.org/W3033133102","https://openalex.org/W4225846781","https://openalex.org/W2321705977","https://openalex.org/W2885024018","https://openalex.org/W2132659060","https://openalex.org/W2031992971","https://openalex.org/W3214791684","https://openalex.org/W2353265673","https://openalex.org/W2031175860"],"abstract_inverted_index":{"Egocentric":[0],"action":[1,101,118],"anticipation":[2,49,102,119],"is":[3,111,195],"a":[4,62,68,94,162,170,240],"challenging":[5],"task":[6],"that":[7],"aims":[8],"to":[9,46,73,113,126,143,150,197],"make":[10,198],"advanced":[11],"predictions":[12],"of":[13,117,153,209,227],"future":[14,88],"actions":[15,89],"from":[16,139],"current":[17,85],"and":[18,34,42,59,80,87,99,157,169,187,204,220],"historical":[19],"observations":[20,86,142],"in":[21,104],"the":[22,31,39,48,82,115,121,128,134,140,145,155,201,207,225],"first-person":[23,217],"view.":[24],"Most":[25],"existing":[26],"methods":[27],"focus":[28],"on":[29,38,61,133,214],"improving":[30],"model":[32],"architecture":[33,179],"loss":[35],"function":[36],"based":[37,132],"visual":[40,57,141,147],"input":[41],"recurrent":[43],"neural":[44],"network":[45,64],"boost":[47],"performance.":[50],"However,":[51],"these":[52],"methods,":[53],"which":[54,231],"merely":[55],"consider":[56],"information":[58,110],"rely":[60],"single":[63],"architecture,":[65],"gradually":[66],"reach":[67],"performance":[69,116,183],"plateau.":[70],"In":[71],"order":[72],"fully":[74,205],"understand":[75],"what":[76],"has":[77],"been":[78],"observed":[79],"capture":[81],"dependencies":[83],"between":[84],"well":[90],"enough,":[91],"we":[92,160],"propose":[93,125],"novel":[95],"visual-semantic":[96,192],"fusion":[97,193],"enhanced":[98],"Transformer-GRU-based":[100],"framework":[103],"this":[105],"paper.":[106],"Firstly,":[107],"high-level":[108],"semantic":[109,129,202],"introduced":[112],"improve":[114],"for":[120,165,173,181,200],"first":[122],"time.":[123],"We":[124],"use":[127],"features":[130],"generated":[131],"class":[135],"labels":[136],"or":[137],"directly":[138],"augment":[144],"original":[146],"features.":[148],"Secondly,":[149],"take":[151],"advantage":[152],"both":[154],"parallel":[156],"autoregressive":[158],"models,":[159],"design":[161],"Transformer-based":[163],"encoder":[164],"long-term":[166],"sequential":[167],"modeling":[168],"GRU-based":[171],"decoder":[172],"flexible":[174],"iteration":[175],"decoding.":[176],"This":[177],"hybrid":[178],"allows":[180],"better":[182],"with":[184],"fewer":[185],"parameters":[186],"computations.":[188],"Thirdly,":[189],"an":[190],"effective":[191],"module":[194],"proposed":[196,229],"up":[199],"gap":[203],"utilize":[206],"complementarity":[208],"different":[210],"modalities.":[211],"Extensive":[212],"experiments":[213],"two":[215,221],"large-scale":[216],"view":[218],"datasets":[219,223],"third-person":[222],"validate":[224],"effectiveness":[226],"our":[228],"method,":[230],"achieves":[232],"new":[233],"state-of-the-art":[234],"performance,":[235],"outperforming":[236],"previous":[237],"approaches":[238],"by":[239],"large":[241],"margin.":[242],"The":[243],"code":[244],"will":[245],"be":[246],"released":[247],"after":[248],"acceptance":[249],"at":[250],"<uri":[251],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[252],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/sunze992/VS-TransGRU</uri>.":[253]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
