{"id":"https://openalex.org/W4385569741","doi":"https://doi.org/10.1109/tcsvt.2023.3301933","title":"Toward Unified Token Learning for Vision-Language Tracking","display_name":"Toward Unified Token Learning for Vision-Language Tracking","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385569741","doi":"https://doi.org/10.1109/tcsvt.2023.3301933"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3301933","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3301933","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065943384","display_name":"Yaozong Zheng","orcid":"https://orcid.org/0009-0007-2664-0574"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yaozong Zheng","raw_affiliation_strings":["Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China","institution_ids":["https://openalex.org/I29739308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058101262","display_name":"Bineng Zhong","orcid":"https://orcid.org/0000-0003-3423-1539"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bineng Zhong","raw_affiliation_strings":["Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China","institution_ids":["https://openalex.org/I29739308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049287545","display_name":"Qihua Liang","orcid":"https://orcid.org/0000-0003-2353-5246"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qihua Liang","raw_affiliation_strings":["Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China","institution_ids":["https://openalex.org/I29739308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100775216","display_name":"Guorong Li","orcid":"https://orcid.org/0000-0003-3954-2387"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guorong Li","raw_affiliation_strings":["Key Laboratory of Big Data Mining and Knowledge Management, School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Big Data Mining and Knowledge Management, School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016080094","display_name":"Rongrong Ji","orcid":"https://orcid.org/0000-0001-9163-2932"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongrong Ji","raw_affiliation_strings":["Media Analytics and Computing Laboratory, Department of Artificial Intelligence, School of Informatics, Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"Media Analytics and Computing Laboratory, Department of Artificial Intelligence, School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037037839","display_name":"Xianxian Li","orcid":"https://orcid.org/0000-0002-7083-3847"},"institutions":[{"id":"https://openalex.org/I29739308","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65","country_code":"CN","type":"education","lineage":["https://openalex.org/I29739308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianxian Li","raw_affiliation_strings":["Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Education Blockchain and Intelligent Technology, Ministry of Education, and the Guangxi Key Laboratory of Multi-Source Information Mining and Security, Guangxi Normal University, Guilin, China","institution_ids":["https://openalex.org/I29739308"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065943384"],"corresponding_institution_ids":["https://openalex.org/I29739308"],"apc_list":null,"apc_paid":null,"fwci":7.6357,"has_fulltext":false,"cited_by_count":64,"citation_normalized_percentile":{"value":0.98218713,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"34","issue":"4","first_page":"2125","last_page":"2135"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.8266779184341431},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7479267716407776},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.5951871871948242},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.507004976272583},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.4945544898509979},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4744721055030823},{"id":"https://openalex.org/keywords/notation","display_name":"Notation","score":0.4590306580066681},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.43694815039634705},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4268733263015747},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.42634284496307373},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3742918372154236},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.1441059112548828},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1284356713294983},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10819077491760254}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.8266779184341431},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7479267716407776},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.5951871871948242},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.507004976272583},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4945544898509979},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4744721055030823},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.4590306580066681},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.43694815039634705},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4268733263015747},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.42634284496307373},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3742918372154236},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.1441059112548828},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1284356713294983},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10819077491760254},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3301933","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3301933","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7400000095367432}],"awards":[{"id":"https://openalex.org/G3295794154","display_name":null,"funder_award_id":"61972167","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G500372149","display_name":null,"funder_award_id":"U21A20474","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5529942192","display_name":null,"funder_award_id":"2022TD002","funder_id":"https://openalex.org/F4320325559","funder_display_name":"Guangxi Normal University"},{"id":"https://openalex.org/G7695376638","display_name":null,"funder_award_id":"2022GXNSFDA035079","funder_id":"https://openalex.org/F4320335965","funder_display_name":"Science and Technology Major Project of Guangxi"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320325559","display_name":"Guangxi Normal University","ror":"https://ror.org/02frt9q65"},{"id":"https://openalex.org/F4320335965","display_name":"Science and Technology Major Project of Guangxi","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W2096733369","https://openalex.org/W2097117768","https://openalex.org/W2158592639","https://openalex.org/W2194775991","https://openalex.org/W2470394683","https://openalex.org/W2747053578","https://openalex.org/W2799058067","https://openalex.org/W2900474539","https://openalex.org/W2962766617","https://openalex.org/W2963109634","https://openalex.org/W2963351448","https://openalex.org/W2963534981","https://openalex.org/W2965373594","https://openalex.org/W2993231436","https://openalex.org/W3001584168","https://openalex.org/W3010072143","https://openalex.org/W3035571898","https://openalex.org/W3090155371","https://openalex.org/W3096609285","https://openalex.org/W3106542916","https://openalex.org/W3108519869","https://openalex.org/W3143320354","https://openalex.org/W3159619744","https://openalex.org/W3173871266","https://openalex.org/W3175823695","https://openalex.org/W3181069167","https://openalex.org/W3190916078","https://openalex.org/W3204540098","https://openalex.org/W3214586131","https://openalex.org/W4214493665","https://openalex.org/W4214759957","https://openalex.org/W4285600988","https://openalex.org/W4292828074","https://openalex.org/W4297456994","https://openalex.org/W4312255167","https://openalex.org/W4312323989","https://openalex.org/W4312532041","https://openalex.org/W4312751983","https://openalex.org/W4312796324","https://openalex.org/W4312805142","https://openalex.org/W4312961003","https://openalex.org/W4313036965","https://openalex.org/W4313068342","https://openalex.org/W4321766229","https://openalex.org/W4386066081","https://openalex.org/W4386075643","https://openalex.org/W6756015008","https://openalex.org/W6757817989","https://openalex.org/W6771096273","https://openalex.org/W6778883912","https://openalex.org/W6791353385","https://openalex.org/W6801512601","https://openalex.org/W6839015040","https://openalex.org/W6839144149"],"related_works":["https://openalex.org/W4237171675","https://openalex.org/W3036286480","https://openalex.org/W4287027631","https://openalex.org/W3192357901","https://openalex.org/W2387360586","https://openalex.org/W2952736415","https://openalex.org/W3209723314","https://openalex.org/W3205398323","https://openalex.org/W2883297582","https://openalex.org/W4390524233"],"abstract_inverted_index":{"In":[0,47,64],"this":[1,65],"paper,":[2],"we":[3],"present":[4],"a":[5,21,59,119],"simple,":[6],"flexible":[7],"and":[8,55,79,101,113,143],"effective":[9],"vision-language":[10],"(VL)":[11],"tracking":[12,19,29,111,129],"pipeline,":[13],"termed":[14],"MMTrack,":[15],"which":[16],"casts":[17],"VL":[18,28,110,128],"as":[20,123],"token":[22,70],"generation":[23],"task.":[24,130],"Traditional":[25],"paradigms":[26],"address":[27],"task":[30],"indirectly":[31],"with":[32],"sophisticated":[33],"prior":[34,95],"designs,":[35],"making":[36],"them":[37],"over-specialize":[38],"on":[39,133],"the":[40,76,85,107],"features":[41],"of":[42,61,84,109],"specific":[43],"architectures":[44],"or":[45],"mechanisms.":[46],"contrast,":[48],"our":[49,115,148],"proposed":[50],"framework":[51],"serializes":[52],"language":[53],"description":[54],"bounding":[56],"box":[57],"into":[58],"sequence":[60],"discrete":[62],"tokens.":[63],"new":[66],"design":[67,92],"paradigm,":[68],"all":[69],"queries":[71],"are":[72],"required":[73],"to":[74,117,154],"perceive":[75],"desired":[77],"target":[78,86],"directly":[80],"predict":[81],"spatial":[82],"coordinates":[83],"in":[87],"an":[88],"auto-regressive":[89],"manner.":[90],"The":[91],"without":[93],"other":[94,155],"modules":[96],"avoids":[97],"multiple":[98],"sub-tasks":[99],"learning":[100],"hand-designed":[102],"loss":[103,122],"functions,":[104],"significantly":[105],"reducing":[106],"complexity":[108],"modeling":[112],"allowing":[114],"tracker":[116],"use":[118],"simple":[120],"cross-entropy":[121],"unified":[124],"optimization":[125],"objective":[126],"for":[127],"Extensive":[131],"experiments":[132],"TNL2K,":[134],"LaSOT,":[135],"LaSOT":[136],"<inline-formula":[137],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[138],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[139],"<tex-math":[140],"notation=\"LaTeX\">$_{\\mathrm{ext}}$":[141],"</tex-math></inline-formula>":[142],"OTB99-Lang":[144],"benchmarks":[145],"show":[146],"that":[147],"approach":[149],"achieves":[150],"promising":[151],"results,":[152],"compared":[153],"state-of-the-arts.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":37},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-11T08:14:18.477133","created_date":"2025-10-10T00:00:00"}
