{"id":"https://openalex.org/W4408354196","doi":"https://doi.org/10.1109/icassp49660.2025.10887950","title":"Continuously Learning Video-level Object Tokens for Robust UAV tracking","display_name":"Continuously Learning Video-level Object Tokens for Robust UAV tracking","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354196","doi":"https://doi.org/10.1109/icassp49660.2025.10887950"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10887950","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887950","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Bin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bin Chen","raw_affiliation_strings":["NUIST"],"affiliations":[{"raw_affiliation_string":"NUIST","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104180468","display_name":"Shenglong Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shenglong Hu","raw_affiliation_strings":["NUIST"],"affiliations":[{"raw_affiliation_string":"NUIST","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019485232","display_name":"Gang Dong","orcid":"https://orcid.org/0000-0002-9515-6643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gang Dong","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd"],"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112801316","display_name":"Lingyan Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lingyan Liang","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd"],"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013433372","display_name":"Dongchao Wen","orcid":"https://orcid.org/0000-0001-7311-1842"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongchao Wen","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd"],"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075412755","display_name":"Kaihua Zhang","orcid":"https://orcid.org/0000-0002-1613-3401"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaihua Zhang","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd"],"affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3088,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77944367,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8309851884841919},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.7415400743484497},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6753877401351929},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6661853194236755},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5459961891174316},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.505521297454834}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8309851884841919},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.7415400743484497},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6753877401351929},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6661853194236755},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5459961891174316},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.505521297454834},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10887950","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887950","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2518876086","https://openalex.org/W2604701225","https://openalex.org/W2891033863","https://openalex.org/W2898200825","https://openalex.org/W2962766617","https://openalex.org/W2963351448","https://openalex.org/W2989604896","https://openalex.org/W3035466700","https://openalex.org/W3203510176","https://openalex.org/W4312735552","https://openalex.org/W4312751983","https://openalex.org/W4312805142","https://openalex.org/W4312897837","https://openalex.org/W4383108846","https://openalex.org/W4385364879","https://openalex.org/W4390872721","https://openalex.org/W4392909888","https://openalex.org/W4393147957","https://openalex.org/W4402704627","https://openalex.org/W4413277608","https://openalex.org/W6739901393","https://openalex.org/W6757817989","https://openalex.org/W6758734028","https://openalex.org/W6786169801","https://openalex.org/W6788135285","https://openalex.org/W6852835101","https://openalex.org/W6867310301"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W4315836309"],"abstract_inverted_index":{"Due":[0],"to":[1,57,132,150,169],"the":[2,10,36,40,44,53,90,136,146,152,156,174,191],"dynamic":[3,102],"changes":[4],"in":[5,12],"flight":[6],"motion":[7],"and":[8,43,78,128,187],"viewpoint,":[9],"objects":[11],"unmanned":[13],"aerial":[14],"vehicle":[15],"(UAV)":[16],"tracking":[17,69,165],"scenarios":[18],"often":[19,28],"suffer":[20],"from":[21,113,173],"drastic":[22,48],"appearance":[23,37,50,111],"variations.":[24],"Existing":[25],"UAV":[26,68,182],"trackers":[27],"leverage":[29],"a":[30,66,94,101,117,126,129,163],"frame-level":[31],"matching":[32],"mechanism,":[33],"which":[34,106],"measures":[35],"similarity":[38],"between":[39],"object":[41,49,81,98,110],"template":[42],"search":[45],"frame.":[46,115],"The":[47],"variations":[51],"degrade":[52],"learned":[54,153],"model,":[55],"leading":[56],"drift":[58],"issue.":[59],"To":[60],"this":[61,63],"end,":[62],"paper":[64],"presents":[65],"video-level":[67,164],"framework":[70],"that":[71,190],"focuses":[72],"on":[73,180],"Continuously":[74],"Learning":[75],"(CL)":[76],"effective":[77],"efficient":[79],"spatio-temporal":[80,97,118,142],"tokens":[82,99,138],"for":[83],"robust":[84],"tracking,":[85],"dubbed":[86],"as":[87],"CLTrack.":[88],"Specifically,":[89],"CLTrack":[91,193],"first":[92],"learns":[93],"series":[95],"of":[96,145],"via":[100,124],"filtering":[103],"module":[104,120],"(DFM),":[105],"encodes":[107,155],"more":[108],"consensus":[109],"information":[112,144,159],"each":[114],"Afterwards,":[116],"enhancement":[119],"(STEM)":[121],"is":[122,167],"designed":[123,168],"cascading":[125],"temporal":[127],"spatial":[130],"attention":[131],"fully":[133],"interact":[134],"with":[135,139],"selected":[137],"stable":[140],"long-range":[141],"context":[143,158],"tracked":[147],"object.":[148],"Finally,":[149],"ensure":[151],"model":[154],"rich":[157],"without":[160],"catastrophic":[161],"forgetting,":[162],"loss":[166],"supervise":[170],"feature":[171],"learning":[172],"whole":[175],"video":[176],"frames.":[177],"Extensive":[178],"experiments":[179],"three":[181],"benchmarks":[183],"including":[184],"UAV123,":[185],"DTB70":[186],"VisDrone2018":[188],"demonstrate":[189],"proposed":[192],"achieves":[194],"state-of-the-art":[195],"performance.":[196]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2025-10-10T00:00:00"}
