{"id":"https://openalex.org/W4414348622","doi":"https://doi.org/10.1109/tnnls.2025.3602089","title":"Toward an Effective Action-Region Tracking Framework for Fine-Grained Video Action Recognition","display_name":"Toward an Effective Action-Region Tracking Framework for Fine-Grained Video Action Recognition","publication_year":2025,"publication_date":"2025-09-19","ids":{"openalex":"https://openalex.org/W4414348622","doi":"https://doi.org/10.1109/tnnls.2025.3602089","pmid":"https://pubmed.ncbi.nlm.nih.gov/40971279"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3602089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3602089","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020149992","display_name":"Baoli Sun","orcid":"https://orcid.org/0000-0002-2861-4288"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Baoli Sun","raw_affiliation_strings":["DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100428935","display_name":"Yihan Wang","orcid":"https://orcid.org/0000-0002-5353-6250"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihan Wang","raw_affiliation_strings":["DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030101527","display_name":"Xinzhu Ma","orcid":"https://orcid.org/0000-0003-0504-0186"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinzhu Ma","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100438119","display_name":"Zhihui Wang","orcid":"https://orcid.org/0000-0002-5011-9726"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]},{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU","CN"],"is_corresponding":false,"raw_author_name":"Zhihui Wang","raw_affiliation_strings":["DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101916038","display_name":"Kun Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Lu","raw_affiliation_strings":["DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100443935","display_name":"Zhiyong Wang","orcid":"https://orcid.org/0000-0002-8043-0312"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]},{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU","CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Wang","raw_affiliation_strings":["DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5020149992"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2660767,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"37","issue":"1","first_page":"176","last_page":"190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6948000192642212},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6740000247955322},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5705000162124634},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5613999962806702},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5152999758720398},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5123000144958496},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4959999918937683},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4957999885082245}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8109999895095825},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6948000192642212},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6740000247955322},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6140999794006348},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5705000162124634},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5152999758720398},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5123000144958496},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4959999918937683},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4957999885082245},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43050000071525574},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.4253999888896942},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.36980000138282776},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.35569998621940613},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.3253999948501587},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3206999897956848},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2799000144004822},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2567000091075897},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3602089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3602089","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40971279","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40971279","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":80,"referenced_works":["https://openalex.org/W343636949","https://openalex.org/W1522734439","https://openalex.org/W2295107390","https://openalex.org/W2342662179","https://openalex.org/W2507009361","https://openalex.org/W2625366777","https://openalex.org/W2746726611","https://openalex.org/W2769112066","https://openalex.org/W2883429621","https://openalex.org/W2895243423","https://openalex.org/W2944006115","https://openalex.org/W2963155035","https://openalex.org/W2963273301","https://openalex.org/W2963524571","https://openalex.org/W2963820951","https://openalex.org/W2981385151","https://openalex.org/W2981923053","https://openalex.org/W2990152177","https://openalex.org/W2990503944","https://openalex.org/W3002271958","https://openalex.org/W3009812836","https://openalex.org/W3034309634","https://openalex.org/W3034572008","https://openalex.org/W3035180180","https://openalex.org/W3035225512","https://openalex.org/W3035303837","https://openalex.org/W3035413240","https://openalex.org/W3035524453","https://openalex.org/W3096609285","https://openalex.org/W3126721948","https://openalex.org/W3131500599","https://openalex.org/W3162565403","https://openalex.org/W3165924482","https://openalex.org/W3171007011","https://openalex.org/W3171516518","https://openalex.org/W3172615411","https://openalex.org/W3175528717","https://openalex.org/W3175624454","https://openalex.org/W3176780013","https://openalex.org/W3193810785","https://openalex.org/W3213518743","https://openalex.org/W3216270236","https://openalex.org/W4206337866","https://openalex.org/W4213304546","https://openalex.org/W4214612132","https://openalex.org/W4214614183","https://openalex.org/W4225414521","https://openalex.org/W4286212078","https://openalex.org/W4304080586","https://openalex.org/W4310330865","https://openalex.org/W4312257792","https://openalex.org/W4312530435","https://openalex.org/W4312560592","https://openalex.org/W4312573566","https://openalex.org/W4312614039","https://openalex.org/W4312658081","https://openalex.org/W4312872526","https://openalex.org/W4313009245","https://openalex.org/W4317829628","https://openalex.org/W4319299930","https://openalex.org/W4360993903","https://openalex.org/W4361856786","https://openalex.org/W4379382445","https://openalex.org/W4379528676","https://openalex.org/W4382239283","https://openalex.org/W4382467086","https://openalex.org/W4385245566","https://openalex.org/W4385452936","https://openalex.org/W4387055576","https://openalex.org/W4387969336","https://openalex.org/W4388854793","https://openalex.org/W4389299497","https://openalex.org/W4390871944","https://openalex.org/W4390873033","https://openalex.org/W4392827338","https://openalex.org/W4393149647","https://openalex.org/W4394593089","https://openalex.org/W4394698843","https://openalex.org/W4402753877","https://openalex.org/W4403842425"],"related_works":[],"abstract_inverted_index":{"Fine-grained":[0],"action":[1,13,115,121,152,167,255],"recognition":[2,17,256],"(FGAR)":[3],"aims":[4],"to":[5,26,53,85,140,220,269],"identify":[6,27],"subtle":[7,28],"and":[8,55,79,100,182,198,263],"distinctive":[9,60],"differences":[10],"among":[11,98,176],"fine-grained":[12],"categories.":[14],"However,":[15],"current":[16],"methods":[18],"often":[19],"capture":[20,86],"coarse-grained":[21],"motion":[22],"patterns":[23],"but":[24,242],"struggle":[25],"details":[29],"in":[30,92,131,192],"local":[31,61],"regions":[32],"evolving":[33],"over":[34],"time.":[35],"In":[36,212],"this":[37],"work,":[38],"we":[39,69,169,214],"introduce":[40],"the":[41,57,87,119,148,157,200,229,267],"action-region":[42],"tracking":[43],"(ART)":[44],"framework,":[45],"a":[46,50,71,132,171,216],"novel":[47],"solution":[48],"leveraging":[49],"query-response":[51],"mechanism":[52,219],"discover":[54],"track":[56],"dynamics":[58,122],"of":[59,151,202],"details,":[62],"enabling":[63],"distinguishing":[64],"similar":[65,203],"actions":[66],"effectively.":[67],"Specifically,":[68],"propose":[70],"region-specific":[72],"semantic":[73,144,230],"activation":[74],"module":[75],"that":[76,228],"employs":[77],"discriminative":[78],"text-constrained":[80,136],"semantics":[81,223],"serve":[82],"as":[83,154],"queries":[84,137],"most":[88],"action-related":[89],"region":[90,109,178,190,204],"responses":[91,110,126,179,191,205],"each":[93,193],"video":[94,105,129,194,208],"frame,":[95],"facilitating":[96],"interaction":[97],"spatial":[99,181],"temporal":[101,183],"dimensions":[102],"with":[103],"corresponding":[104],"features.":[106],"The":[107,135],"captured":[108],"are":[111,138,238],"then":[112],"organized":[113],"into":[114],"tracklets,":[116,168],"which":[117,185],"characterize":[118],"region-based":[120],"by":[123,156,233],"linking":[124],"related":[125],"across":[127],"different":[128],"frames":[130,209],"coherent":[133],"sequence.":[134],"designed":[139],"expressly":[141],"encode":[142],"nuanced":[143],"representations":[145,231],"derived":[146],"from":[147],"textual":[149,222],"descriptions":[150],"labels,":[153],"extracted":[155],"language":[158,162,235],"branches":[159],"within":[160],"visual":[161],"models.":[163],"To":[164],"optimize":[165],"generated":[166],"design":[170],"multilevel":[172],"tracklet":[173],"contrastive":[174],"constraint":[175],"multiple":[177],"at":[180],"levels,":[184],"can":[186],"effectively":[187],"distinguish":[188],"individual":[189],"frame":[195],"(spatial":[196],"level)":[197],"establish":[199],"correlation":[201],"between":[206],"adjacent":[207],"(temporal":[210],"level).":[211],"addition,":[213],"implement":[215],"task-specific":[217],"fine-tuning":[218],"refine":[221],"during":[224],"training.":[225],"This":[226],"ensures":[227],"encoded":[232],"vision":[234],"models":[236],"(VLMs)":[237],"not":[239],"only":[240],"preserved":[241],"also":[243],"optimized":[244],"for":[245],"specific":[246],"task":[247],"preferences.":[248],"Comprehensive":[249],"experiments":[250],"on":[251],"several":[252],"widely":[253],"used":[254],"benchmarks,":[257],"i.e.,":[258],"FineGym,":[259],"Diving48,":[260],"NTURGB-D,":[261],"Kinetics,":[262],"Something-Something,":[264],"clearly":[265],"demonstrate":[266],"superiority":[268],"previous":[270],"state-of-the-art":[271],"baselines.":[272]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
