{"id":"https://openalex.org/W4403791254","doi":"https://doi.org/10.1145/3664647.3680657","title":"Consistencies are All You Need for Semi-supervised Vision-Language Tracking","display_name":"Consistencies are All You Need for Semi-supervised Vision-Language Tracking","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791254","doi":"https://doi.org/10.1145/3664647.3680657"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680657","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680657","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018527272","display_name":"Jiawei Ge","orcid":"https://orcid.org/0000-0001-7268-7815"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiawei Ge","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012384188","display_name":"Jiuxin Cao","orcid":"https://orcid.org/0000-0002-2448-6717"},"institutions":[{"id":"https://openalex.org/I4210155350","display_name":"Purple Mountain Laboratories","ror":"https://ror.org/04zcbk583","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210155350"]},{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiuxin Cao","raw_affiliation_strings":["Southeast University &amp; Purple Mountain Laboratories, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University &amp; Purple Mountain Laboratories, Nanjing, China","institution_ids":["https://openalex.org/I4210155350","https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076214561","display_name":"Xuelin Zhu","orcid":"https://orcid.org/0000-0001-7676-2843"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuelin Zhu","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100769571","display_name":"Xinyu Zhang","orcid":"https://orcid.org/0000-0002-2838-1445"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Zhang","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100353385","display_name":"Chang Liu","orcid":"https://orcid.org/0000-0001-5365-8787"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Liu","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042411401","display_name":"Kun Wang","orcid":"https://orcid.org/0000-0002-6735-7667"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Wang","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100461575","display_name":"Bo Liu","orcid":"https://orcid.org/0000-0001-5209-9063"},"institutions":[{"id":"https://openalex.org/I4210155350","display_name":"Purple Mountain Laboratories","ror":"https://ror.org/04zcbk583","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210155350"]},{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Liu","raw_affiliation_strings":["Southeast University &amp; Purple Mountain Laboratories, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University &amp; Purple Mountain Laboratories, Nanjing, China","institution_ids":["https://openalex.org/I4210155350","https://openalex.org/I76569877"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5018527272"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":2.4874,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.90748996,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1895","last_page":"1904"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7156425714492798},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6788533926010132},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6332904100418091},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5161831974983215},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3825991153717041},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09722450375556946}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7156425714492798},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6788533926010132},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6332904100418091},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5161831974983215},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3825991153717041},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09722450375556946},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680657","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680657","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1917989004","https://openalex.org/W2154889144","https://openalex.org/W2277195237","https://openalex.org/W2470394683","https://openalex.org/W2557641257","https://openalex.org/W2745461083","https://openalex.org/W2747053578","https://openalex.org/W2794744029","https://openalex.org/W2891033863","https://openalex.org/W2927438889","https://openalex.org/W2946245424","https://openalex.org/W2989688045","https://openalex.org/W3010072143","https://openalex.org/W3035497460","https://openalex.org/W3088021439","https://openalex.org/W3090449556","https://openalex.org/W3093022272","https://openalex.org/W3106542916","https://openalex.org/W3136789123","https://openalex.org/W3138516171","https://openalex.org/W3142849873","https://openalex.org/W3171581326","https://openalex.org/W3173871266","https://openalex.org/W3176187859","https://openalex.org/W3176709420","https://openalex.org/W3181069167","https://openalex.org/W3196204376","https://openalex.org/W3204554907","https://openalex.org/W3205239873","https://openalex.org/W3214586131","https://openalex.org/W4214710672","https://openalex.org/W4226058394","https://openalex.org/W4234552385","https://openalex.org/W4292828074","https://openalex.org/W4297792979","https://openalex.org/W4304080876","https://openalex.org/W4312956471","https://openalex.org/W4312960937","https://openalex.org/W4386071798","https://openalex.org/W4386075643","https://openalex.org/W4389665280","https://openalex.org/W4390874143"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Vision-Language":[0,245],"Tracking":[1,246],"(VLT)":[2],"requires":[3],"locating":[4],"a":[5,12,48,94,115,179,216],"specific":[6],"target":[7],"in":[8,70,196],"video":[9,116],"sequences,":[10],"given":[11],"natural":[13,112],"language":[14],"prompt":[15],"and":[16,30,120,135,165,239],"an":[17,65,143,197],"initial":[18],"object":[19,137],"box.":[20],"Despite":[21],"recent":[22],"advancements,":[23],"existing":[24],"approaches":[25],"heavily":[26],"rely":[27],"on":[28,101],"expensive":[29],"time-consuming":[31],"human":[32],"annotations.":[33],"To":[34],"mitigate":[35],"this":[36,233],"limitation,":[37],"directly":[38],"generating":[39],"pseudo":[40],"labels":[41],"from":[42,110,128,139],"raw":[43],"videos":[44],"seems":[45],"to":[46,170,201,236],"be":[47],"straightforward":[49],"solution;":[50],"however,":[51],"it":[52,140],"inevitably":[53],"introduces":[54],"undesirable":[55],"noise":[56],"during":[57],"the":[58,72,76,85,99,111,161,174,193,226,241],"training":[59],"process.":[60],"Moreover,":[61],"we":[62,83,184],"insist":[63],"that":[64,211],"efficient":[66],"tracker":[67,183],"should":[68,155],"excel":[69],"tracking":[71,153],"target,":[73],"regardless":[74],"of":[75,114,228],"temporal":[77],"direction.":[78],"Building":[79],"upon":[80],"these":[81,129],"insights,":[82],"propose":[84],"pioneering":[86],"semi-supervised":[87],"learning":[88,229],"scheme":[89],"for":[90,148,243],"VLT":[91],"task,":[92],"representing":[93],"crucial":[95],"step":[96],"towards":[97],"reducing":[98],"dependency":[100],"high-quality":[102],"yet":[103,181],"costly":[104],"labeled":[105],"data.":[106],"Specifically,":[107],"drawing":[108],"inspiration":[109],"attributes":[113],"(i.e.,":[117],"space,":[118],"time,":[119],"semantics),":[121],"our":[122,212],"approach":[123],"progressively":[124],"leverages":[125],"inherent":[126],"consistencies":[127],"aspects:":[130],"(1)":[131],"Spatially,":[132],"each":[133],"frame":[134],"any":[136],"cropped":[138],"naturally":[141],"form":[142],"image-bbox":[144],"(bounding":[145],"box)":[146],"pair":[147],"self-training;":[149],"(2)":[150],"Temporally,":[151],"bidirectional":[152],"trajectories":[154],"exhibit":[156],"minimal":[157],"differences;":[158],"(3)":[159],"Semantically,":[160],"correlation":[162],"between":[163],"visual":[164],"textual":[166],"features":[167,204],"is":[168,176],"expected":[169],"remain":[171],"consistent.":[172],"Furthermore,":[173],"framework":[175],"validated":[177],"with":[178,230],"simple":[180],"effective":[182],"devised,":[185],"named":[186],"ATTracker":[187,213],"(Asymmetrical":[188],"Transformer":[189],"Tracker).":[190],"It":[191],"modifies":[192],"self-attention":[194],"operation":[195],"asymmetrical":[198],"way,":[199],"striving":[200],"enhance":[202],"target-related":[203],"while":[205],"suppressing":[206],"noise.":[207],"Extensive":[208],"experiments":[209],"confirm":[210],"serves":[214],"as":[215],"robust":[217],"baseline,":[218],"outperforming":[219],"fully":[220],"supervised":[221],"base":[222],"trackers.":[223],"By":[224],"unveiling":[225],"potential":[227],"limited":[231],"annotations,":[232],"study":[234],"aims":[235],"attract":[237],"attention":[238],"pave":[240],"way":[242],"Semi-supervised":[244],"(SS-VLT).":[247]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":6}],"updated_date":"2026-04-01T17:29:45.350535","created_date":"2025-10-10T00:00:00"}
