{"id":"https://openalex.org/W4399344486","doi":"https://doi.org/10.1109/tpami.2024.3409078","title":"Divert More Attention to Vision-Language Object Tracking","display_name":"Divert More Attention to Vision-Language Object Tracking","publication_year":2024,"publication_date":"2024-06-04","ids":{"openalex":"https://openalex.org/W4399344486","doi":"https://doi.org/10.1109/tpami.2024.3409078","pmid":"https://pubmed.ncbi.nlm.nih.gov/38833398"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3409078","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3409078","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115444828","display_name":"Mingzhe Guo","orcid":"https://orcid.org/0000-0001-6399-9753"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingzhe Guo","raw_affiliation_strings":["Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6399-9753","affiliations":[{"raw_affiliation_string":"Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100410132","display_name":"Zhipeng Zhang","orcid":"https://orcid.org/0000-0003-0479-332X"},"institutions":[{"id":"https://openalex.org/I4401726870","display_name":"Didi Chuxing (China)","ror":"https://ror.org/02ksqcf75","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726870"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhipeng Zhang","raw_affiliation_strings":["KargoBot, Beijing, China","DiDi Chuxing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0479-332X","affiliations":[{"raw_affiliation_string":"KargoBot, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"DiDi Chuxing, Beijing, China","institution_ids":["https://openalex.org/I4401726870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069749738","display_name":"Liping Jing","orcid":"https://orcid.org/0000-0001-7578-3407"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liping Jing","raw_affiliation_strings":["Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7578-3407","affiliations":[{"raw_affiliation_string":"Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061469520","display_name":"Haibin Ling","orcid":"https://orcid.org/0000-0003-4094-8413"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haibin Ling","raw_affiliation_strings":["Department of Computer Science, Stony Brook University, Stony Brook, NY, USA"],"raw_orcid":"https://orcid.org/0000-0003-4094-8413","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Stony Brook University, Stony Brook, NY, USA","institution_ids":["https://openalex.org/I59553526"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047220188","display_name":"Heng Fan","orcid":"https://orcid.org/0000-0002-7033-3690"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heng Fan","raw_affiliation_strings":["Department of Computer Science and Engineering, University of North Texas, Denton, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-7033-3690","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of North Texas, Denton, TX, USA","institution_ids":["https://openalex.org/I123534392"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5115444828"],"corresponding_institution_ids":["https://openalex.org/I21193070"],"apc_list":null,"apc_paid":null,"fwci":2.9925,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.92371589,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"46","issue":"12","first_page":"8600","last_page":"8618"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.886900007724762,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.886900007724762,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.8683000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13277","display_name":"Media, Religion, Digital Communication","score":0.8432999849319458,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.800283670425415},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6276024580001831},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.534623920917511},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.5211705565452576},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4871322810649872},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4313647747039795},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42032191157341003},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36706751585006714},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.28986263275146484}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.800283670425415},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6276024580001831},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.534623920917511},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.5211705565452576},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4871322810649872},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4313647747039795},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42032191157341003},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36706751585006714},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.28986263275146484},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3409078","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3409078","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:38833398","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38833398","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4118857521","display_name":null,"funder_award_id":"62176020","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8377820436","display_name":null,"funder_award_id":"2019JBZ110","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G8689330525","display_name":null,"funder_award_id":"L211016","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":89,"referenced_works":["https://openalex.org/W345900524","https://openalex.org/W1861492603","https://openalex.org/W1964846093","https://openalex.org/W2108598243","https://openalex.org/W2154889144","https://openalex.org/W2158592639","https://openalex.org/W2194775991","https://openalex.org/W2408241409","https://openalex.org/W2470394683","https://openalex.org/W2557641257","https://openalex.org/W2579923771","https://openalex.org/W2592463526","https://openalex.org/W2745461083","https://openalex.org/W2747053578","https://openalex.org/W2794744029","https://openalex.org/W2799058067","https://openalex.org/W2891033863","https://openalex.org/W2896006880","https://openalex.org/W2896457183","https://openalex.org/W2898200825","https://openalex.org/W2955747520","https://openalex.org/W2962858109","https://openalex.org/W2963109634","https://openalex.org/W2963125010","https://openalex.org/W2963150697","https://openalex.org/W2963350032","https://openalex.org/W2963383024","https://openalex.org/W2963534981","https://openalex.org/W2964423614","https://openalex.org/W2965658867","https://openalex.org/W2966759264","https://openalex.org/W3010072143","https://openalex.org/W3035276082","https://openalex.org/W3035453691","https://openalex.org/W3035511673","https://openalex.org/W3035590142","https://openalex.org/W3035672751","https://openalex.org/W3090155371","https://openalex.org/W3096533519","https://openalex.org/W3108235634","https://openalex.org/W3108519869","https://openalex.org/W3132455321","https://openalex.org/W3159196909","https://openalex.org/W3159619744","https://openalex.org/W3159649695","https://openalex.org/W3167536469","https://openalex.org/W3173344965","https://openalex.org/W3173871266","https://openalex.org/W3175126073","https://openalex.org/W3181069167","https://openalex.org/W3181414820","https://openalex.org/W3197481605","https://openalex.org/W3204554907","https://openalex.org/W3214586131","https://openalex.org/W4200631575","https://openalex.org/W4214759957","https://openalex.org/W4229042118","https://openalex.org/W4292787094","https://openalex.org/W4292828275","https://openalex.org/W4307106676","https://openalex.org/W4312751983","https://openalex.org/W4312805142","https://openalex.org/W4312956471","https://openalex.org/W4377164404","https://openalex.org/W4385245566","https://openalex.org/W4386065544","https://openalex.org/W4386066081","https://openalex.org/W4386066394","https://openalex.org/W4386075643","https://openalex.org/W4390874575","https://openalex.org/W4399344486","https://openalex.org/W6680922216","https://openalex.org/W6729956949","https://openalex.org/W6732517885","https://openalex.org/W6736942654","https://openalex.org/W6752083267","https://openalex.org/W6752515464","https://openalex.org/W6755207826","https://openalex.org/W6755526309","https://openalex.org/W6767327189","https://openalex.org/W6771571646","https://openalex.org/W6779520018","https://openalex.org/W6791353385","https://openalex.org/W6796149595","https://openalex.org/W6803537622","https://openalex.org/W6803567076","https://openalex.org/W6805147364","https://openalex.org/W6811072154","https://openalex.org/W6852999659"],"related_works":["https://openalex.org/W2062195135","https://openalex.org/W2807619505","https://openalex.org/W2795079307","https://openalex.org/W2793058541","https://openalex.org/W1983629434","https://openalex.org/W2055929693","https://openalex.org/W4324271173","https://openalex.org/W2554330399","https://openalex.org/W1967645776","https://openalex.org/W2352227742"],"abstract_inverted_index":{"Multimodal":[0],"vision-language":[1,45,50,64,103],"(VL)":[2],"learning":[3,52,121],"has":[4],"noticeably":[5],"pushed":[6],"the":[7,37,41,127,130,156,163,174,181,189,225,232],"tendency":[8],"toward":[9],"generic":[10],"intelligence":[11],"owing":[12],"to":[13,60,91,117,149,219,234,238,243],"emerging":[14],"large":[15,71],"foundation":[16],"models.":[17],"However,":[18],"tracking,":[19,67],"as":[20],"a":[21,70,86,101,114,122,146],"fundamental":[22],"vision":[23],"problem,":[24],"surprisingly":[25],"enjoys":[26],"less":[27],"bonus":[28],"from":[29],"recent":[30],"flourishing":[31],"VL":[32,124,142,228,239],"learning.":[33,78],"We":[34,111],"argue":[35],"that":[36,200],"reasons":[38],"are":[39,129],"two-fold:":[40],"lack":[42],"of":[43,53,158,227],"large-scale":[44,102],"annotated":[46],"videos":[47,93],"and":[48,135,188,241],"ineffective":[49],"interaction":[51],"current":[54],"works.":[55],"These":[56],"nuisances":[57],"motivate":[58],"us":[59],"design":[61],"more":[62,107,236,245],"effective":[63],"representation":[65],"for":[66,76,247],"meanwhile":[68],"constructing":[69],"database":[72,105],"with":[73,106,170,250],"language":[74],"annotation":[75,89],"model":[77],"Particularly,":[79],"in":[80,94],"this":[81],"paper,":[82],"we":[83,144,161,214,230],"first":[84],"propose":[85],"general":[87],"attribute":[88],"strategy":[90],"decorate":[92],"six":[95,209],"popular":[96],"tracking":[97,104,119,168,240,249],"benchmarks,":[98],"which":[99],"contributes":[100],"than":[108],"23,000":[109],"videos.":[110],"then":[112],"introduce":[113,145],"novel":[115],"framework":[116,165,202],"improve":[118,141,205],"by":[120],"unified-adaptive":[123],"representation,":[125,143,229],"where":[126],"cores":[128],"proposed":[131,164],"asymmetric":[132],"architecture":[133],"search":[134],"modality":[136],"mixer":[137],"(ModaMixer).":[138],"To":[139,153],"further":[140],"contrastive":[147],"loss":[148],"align":[150],"different":[151,171],"modalities.":[152],"thoroughly":[154],"evidence":[155],"effectiveness":[157],"our":[159,201,217],"method,":[160],"integrate":[162],"on":[166,208],"three":[167],"methods":[169],"designs,":[172],"i.e.,":[173],"CNN-based":[175],"SiamCAR":[176],"(Guo":[177],"et":[178,185,194],"al.":[179,186,195],"2020),":[180],"Transformer-based":[182],"OSTrack":[183],"(Ye":[184],"2022),":[187],"hybrid":[190],"structure":[191],"TransT":[192],"(Chen":[193],"2021).":[196],"The":[197],"experiments":[198],"demonstrate":[199],"can":[203],"significantly":[204],"all":[206],"baselines":[207],"benchmarks.":[210],"Besides":[211],"empirical":[212],"results,":[213],"theoretically":[215],"analyze":[216],"approach":[218],"show":[220],"its":[221],"rationality.":[222],"By":[223],"revealing":[224],"potential":[226],"expect":[231],"community":[233],"divert":[235],"attention":[237],"hope":[242],"open":[244],"possibilities":[246],"future":[248],"diversified":[251],"multimodal":[252],"messages.":[253]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
