{"id":"https://openalex.org/W4415353234","doi":"https://doi.org/10.1109/tcsvt.2025.3623281","title":"State Space Models for Natural Language Tracking: Exploring Context-Adaptive Language Cues","display_name":"State Space Models for Natural Language Tracking: Exploring Context-Adaptive Language Cues","publication_year":2025,"publication_date":"2025-10-20","ids":{"openalex":"https://openalex.org/W4415353234","doi":"https://doi.org/10.1109/tcsvt.2025.3623281"},"language":null,"primary_location":{"id":"doi:10.1109/tcsvt.2025.3623281","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3623281","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100514151","display_name":"Yuyang Tang","orcid":"https://orcid.org/0000-0001-5292-2873"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuyang Tang","raw_affiliation_strings":["MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","School of Information Science, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060224928","display_name":"Yinchao Ma","orcid":"https://orcid.org/0009-0003-0506-5217"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinchao Ma","raw_affiliation_strings":["MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","School of Information Science, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101450608","display_name":"Dewei Yang","orcid":"https://orcid.org/0000-0002-3914-4445"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengqing Yang","raw_affiliation_strings":["MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","School of Information Science, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112305275","display_name":"Jingshan Xiao","orcid":"https://orcid.org/0000-0001-9384-8665"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Xiao","raw_affiliation_strings":["MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","School of Information Science, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100648981","display_name":"Tianzhu Zhang","orcid":"https://orcid.org/0000-0003-1856-9564"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianzhu Zhang","raw_affiliation_strings":["MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","School of Information Science, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition and the School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Information Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100514151"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16270767,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"3","first_page":"3820","last_page":"3833"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8148000240325928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8148000240325928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.7443000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7337999939918518,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.7060999870300293},{"id":"https://openalex.org/keywords/bittorrent-tracker","display_name":"BitTorrent tracker","score":0.6111000180244446},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5590000152587891},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.4855000078678131},{"id":"https://openalex.org/keywords/natural-language-user-interface","display_name":"Natural language user interface","score":0.46549999713897705},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4629000127315521},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.46219998598098755},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.43630000948905945},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4068000018596649}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8325999975204468},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.7060999870300293},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6258000135421753},{"id":"https://openalex.org/C57501372","wikidata":"https://www.wikidata.org/wiki/Q2021268","display_name":"BitTorrent tracker","level":3,"score":0.6111000180244446},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5590000152587891},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.508400022983551},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.4855000078678131},{"id":"https://openalex.org/C174252522","wikidata":"https://www.wikidata.org/wiki/Q3816772","display_name":"Natural language user interface","level":3,"score":0.46549999713897705},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4629000127315521},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.46219998598098755},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.43630000948905945},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.3986000120639801},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.39250001311302185},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.3675000071525574},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3522000014781952},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.33629998564720154},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.32010000944137573},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3111000061035156},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C20724563","wikidata":"https://www.wikidata.org/wiki/Q3075258","display_name":"Language primitive","level":4,"score":0.28119999170303345},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2581000030040741},{"id":"https://openalex.org/C71611378","wikidata":"https://www.wikidata.org/wiki/Q5165191","display_name":"Contextual design","level":3,"score":0.25769999623298645},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3623281","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3623281","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W2158592639","https://openalex.org/W2747053578","https://openalex.org/W2891033863","https://openalex.org/W2896457183","https://openalex.org/W2962766617","https://openalex.org/W2963109634","https://openalex.org/W2965373594","https://openalex.org/W3010072143","https://openalex.org/W3012573144","https://openalex.org/W3017266435","https://openalex.org/W3090155371","https://openalex.org/W3106127916","https://openalex.org/W3106542916","https://openalex.org/W3109466111","https://openalex.org/W3167536469","https://openalex.org/W3173871266","https://openalex.org/W3181069167","https://openalex.org/W3214586131","https://openalex.org/W4292828074","https://openalex.org/W4309181071","https://openalex.org/W4312751983","https://openalex.org/W4312796324","https://openalex.org/W4313145013","https://openalex.org/W4313156423","https://openalex.org/W4376133601","https://openalex.org/W4376851307","https://openalex.org/W4380520387","https://openalex.org/W4381735302","https://openalex.org/W4385245566","https://openalex.org/W4385569741","https://openalex.org/W4386066459","https://openalex.org/W4386075643","https://openalex.org/W4387969322","https://openalex.org/W4387986753","https://openalex.org/W4390872215","https://openalex.org/W4393159404","https://openalex.org/W4396505919","https://openalex.org/W4400944606","https://openalex.org/W4402704627","https://openalex.org/W4402753915","https://openalex.org/W4403842425","https://openalex.org/W4403998622","https://openalex.org/W4406754764","https://openalex.org/W4409128969","https://openalex.org/W4409152826","https://openalex.org/W4409346692","https://openalex.org/W4409368509","https://openalex.org/W4409683198","https://openalex.org/W4412444687","https://openalex.org/W4415798002","https://openalex.org/W4415798615"],"related_works":[],"abstract_inverted_index":{"Natural":[0],"language":[1,14,22,39,102,130,134,140],"tracking":[2,173],"aims":[3],"to":[4,28,45,104,109,123],"locate":[5],"the":[6,21,30,72,75,119,137,145,149,153,157],"target":[7,31,48,127,150,158],"of":[8,74,148],"a":[9,13,42,81,94,167],"video":[10,24],"based":[11],"on":[12],"description.":[15],"The":[16],"rich":[17],"contextual":[18,126],"information":[19,55,128],"inside":[20],"and":[23,33,56,66,151,172],"sequence":[25],"is":[26],"essential":[27],"describe":[29],"movements":[32],"appearance":[34],"variations.":[35],"However,":[36],"existing":[37],"natural":[38],"trackers":[40],"design":[41],"fixed-length":[43],"memory":[44],"store":[46],"historical":[47],"information,":[49],"which":[50],"merely":[51],"uses":[52],"limited":[53],"context":[54],"necessitates":[57],"manually":[58],"designed":[59],"modules,":[60],"resulting":[61],"in":[62,155],"sub-optimal":[63],"localization":[64],"performance":[65],"numerous":[67],"computational":[68],"costs.":[69],"Inspired":[70],"by":[71],"success":[73],"state":[76,97],"space":[77,98],"model,":[78],"we":[79,92],"propose":[80,93],"novel":[82,95],"Context-adaptive":[83],"Mamba":[84],"Tracker":[85],"(CMTrack).":[86],"It":[87],"enjoys":[88],"several":[89],"merits.":[90],"First,":[91],"context-aware":[96],"model":[99],"that":[100,180],"enables":[101],"features":[103,114],"serve":[105],"as":[106],"hidden":[107,120],"states":[108,121],"interact":[110],"with":[111,174],"relevant":[112],"image":[113],"adaptively.":[115],"Second,":[116],"CMTrack":[117,165,181],"transfers":[118],"frame-by-frame":[122],"continuously":[124],"incorporate":[125],"into":[129],"features,":[131],"enabling":[132],"context-adaptive":[133,139],"cues.":[135],"Third,":[136],"proposed":[138],"cues":[141],"can":[142],"effectively":[143],"capture":[144],"long-range":[146],"behavior":[147],"guide":[152],"tracker":[154],"locating":[156],"accurately":[159],"without":[160],"any":[161],"extra":[162],"design.":[163],"Finally,":[164],"provides":[166],"neat":[168],"pipeline":[169],"for":[170],"training":[171],"linear":[175],"complexity.":[176],"Experimental":[177],"results":[178],"demonstrate":[179],"achieves":[182],"new":[183],"state-of-the-art":[184],"performance.":[185]},"counts_by_year":[],"updated_date":"2026-03-09T07:00:12.390032","created_date":"2025-10-21T00:00:00"}
