{"id":"https://openalex.org/W7133309892","doi":"https://doi.org/10.48550/arxiv.2603.01412","title":"UETrack: A Unified and Efficient Framework for Single Object Tracking","display_name":"UETrack: A Unified and Efficient Framework for Single Object Tracking","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133309892","doi":"https://doi.org/10.48550/arxiv.2603.01412"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01412","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01412","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01412","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127935610","display_name":"Ben Kang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kang, Ben","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127948287","display_name":"Jie Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127933316","display_name":"Xin Chen (14149)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102590430","display_name":"Wanting Geng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geng, Wanting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127907379","display_name":"Bin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127968941","display_name":"Lu Zhang (50563)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Lu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127989535","display_name":"Dong Hwan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Dong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127929418","display_name":"Huchuan Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Huchuan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5127935610"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.0008999999845400453,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6092000007629395},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5098999738693237},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.5031999945640564},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.47519999742507935},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.4528999924659729},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.448199987411499},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.412200003862381},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.35910001397132874}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7720000147819519},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6092000007629395},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5098999738693237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5049999952316284},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.5031999945640564},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.47519999742507935},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.448199987411499},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37940001487731934},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3474999964237213},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.32280001044273376},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.32120001316070557},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.3188999891281128},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31859999895095825},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.27649998664855957},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01412","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01412","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01412","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01412","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.43706774711608887,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"growing":[1],"real-world":[2],"demands,":[3],"efficient":[4,50,78],"tracking":[5,27],"has":[6],"received":[7],"increasing":[8],"attention.":[9],"However,":[10],"most":[11],"existing":[12],"methods":[13],"are":[14],"limited":[15],"to":[16,138],"RGB":[17],"inputs":[18],"and":[19,37,60,71,73,97,100,117,149,159],"struggle":[20],"in":[21,77],"multi-modal":[22,26,79],"scenarios.":[23],"Moreover,":[24],"current":[25],"approaches":[28],"typically":[29],"use":[30],"complex":[31],"designs,":[32],"making":[33],"them":[34],"too":[35],"heavy":[36],"slow":[38],"for":[39,52],"resource-constrained":[40],"deployment.":[41],"To":[42],"tackle":[43],"these":[44],"limitations,":[45],"we":[46],"propose":[47],"UETrack,":[48],"an":[49],"framework":[51],"single":[53],"object":[54],"tracking.":[55,80],"UETrack":[56,131],"demonstrates":[57],"high":[58],"practicality":[59,158],"versatility,":[61],"efficiently":[62],"handling":[63],"multiple":[64],"modalities":[65],"including":[66],"RGB,":[67],"Depth,":[68],"Thermal,":[69],"Event,":[70],"Language,":[72],"addresses":[74],"the":[75],"gap":[76],"It":[81],"introduces":[82],"two":[83],"key":[84],"components:":[85],"a":[86,101,133],"Token-Pooling-based":[87],"Mixture-of-Experts":[88],"mechanism":[89],"that":[90,106,130],"enhances":[91],"modeling":[92],"capacity":[93],"through":[94],"feature":[95],"aggregation":[96],"expert":[98],"specialization,":[99],"Target-aware":[102],"Adaptive":[103],"Distillation":[104],"strategy":[105],"selectively":[107],"performs":[108],"distillation":[109],"based":[110],"on":[111,122,147,154],"sample":[112],"characteristics,":[113],"reducing":[114],"redundant":[115],"supervision":[116],"improving":[118],"performance.":[119],"Extensive":[120],"experiments":[121],"12":[123],"benchmarks":[124],"across":[125],"3":[126],"hardware":[127],"platforms":[128],"show":[129],"achieves":[132,144],"superior":[134],"speed-accuracy":[135],"trade-off":[136],"compared":[137],"previous":[139],"methods.":[140],"For":[141],"instance,":[142],"UETrack-B":[143],"69.2%":[145],"AUC":[146],"LaSOT":[148],"runs":[150],"at":[151,164],"163/56/60":[152],"FPS":[153],"GPU/CPU/AGX,":[155],"demonstrating":[156],"strong":[157],"versatility.":[160],"Code":[161],"is":[162],"available":[163],"https://github.com/kangben258/UETrack.":[165]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
