{"id":"https://openalex.org/W7154209072","doi":"https://doi.org/10.48550/arxiv.2604.10582","title":"TAPNext++: What's Next for Tracking Any Point (TAP)?","display_name":"TAPNext++: What's Next for Tracking Any Point (TAP)?","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154209072","doi":"https://doi.org/10.48550/arxiv.2604.10582"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10582","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10582","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10582","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133616200","display_name":"Sebastian Jung","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jung, Sebastian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032713067","display_name":"Artem Zholus","orcid":"https://orcid.org/0000-0003-3167-3585"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zholus, Artem","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064985080","display_name":"Martin Sundermeyer","orcid":"https://orcid.org/0000-0003-0587-9643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sundermeyer, Martin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081047759","display_name":"Carl Doersch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Doersch, Carl","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029480918","display_name":"Ross Goroshin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goroshin, Ross","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051298870","display_name":"David Joseph Tan","orcid":"https://orcid.org/0000-0002-7835-6280"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, David Joseph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120402680","display_name":"Sarath Chandar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chandar, Sarath","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050478468","display_name":"Rudolph Triebel","orcid":"https://orcid.org/0000-0002-7975-036X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Triebel, Rudolph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133619948","display_name":"Federico Tombari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tombari, Federico","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5133616200"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.5519000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.5519000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.1462000012397766,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.06769999861717224,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6098999977111816},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.6046000123023987},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4778999984264374},{"id":"https://openalex.org/keywords/active-appearance-model","display_name":"Active appearance model","score":0.3691999912261963},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.3619999885559082},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.33889999985694885},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.33550000190734863},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.3206000030040741}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6929000020027161},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6692000031471252},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6098999977111816},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6057999730110168},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.6046000123023987},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4778999984264374},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.3691999912261963},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.3619999885559082},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3206000030040741},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3075999915599823},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3052999973297119},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C89992363","wikidata":"https://www.wikidata.org/wiki/Q5961558","display_name":"Track (disk drive)","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C174440990","wikidata":"https://www.wikidata.org/wiki/Q681349","display_name":"Point-to-point","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.27810001373291016},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.27480000257492065},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.26969999074935913},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26249998807907104},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10582","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10582","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10582","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10582","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tracking-Any-Point":[0],"(TAP)":[1],"models":[2],"aim":[3],"to":[4,32,62,148],"track":[5,33],"any":[6],"point":[7,171,187],"through":[8],"a":[9,13,37,81,132,141,191],"video":[10,56,109],"which":[11],"is":[12,131],"crucial":[14],"task":[15],"in":[16,36,86,135],"AR/XR":[17],"and":[18,58,99,139,173,189,198],"robotics":[19],"applications.":[20],"The":[21],"recently":[22],"introduced":[23],"TAPNext":[24,52],"approach":[25],"proposes":[26],"an":[27],"end-to-end,":[28],"recurrent":[29,108,180],"transformer":[30,110],"architecture":[31],"points":[34,65,85],"frame-by-frame":[35],"purely":[38],"online":[39],"fashion":[40],"--":[41],"demonstrating":[42],"competitive":[43],"performance":[44,130],"at":[45,203],"minimal":[46],"latency.":[47],"However,":[48],"we":[49,78,160],"show":[50],"that":[51,66,83,88,128,169,179],"struggles":[53],"with":[54],"longer":[55,93],"sequences":[57,87,120],"also":[59],"frequently":[60],"fails":[61],"re-detect":[63],"query":[64],"reappear":[67],"after":[68],"being":[69],"occluded":[70,175],"or":[71],"leaving":[72],"the":[73,96,103,107,136],"frame.":[74],"In":[75],"this":[76],"work,":[77],"present":[79],"TAPNext++,":[80],"model":[82],"tracks":[84],"are":[89],"orders":[90],"of":[91,102,158],"magnitude":[92],"while":[94],"preserving":[95],"low":[97],"memory":[98],"compute":[100],"footprint":[101],"architecture.":[104],"We":[105,126,177],"train":[106],"using":[111],"several":[112],"data-driven":[113],"solutions,":[114],"including":[115],"training":[116],"on":[117,152,194],"long":[118],"1024-frame":[119],"enabled":[121],"by":[122],"sequence":[123],"parallelism":[124],"techniques.":[125],"highlight":[127],"re-detection":[129,157],"blind":[133],"spot":[134],"current":[137],"literature":[138],"introduce":[140,161],"new":[142,192],"metric,":[143],"Re-Detection":[144],"Average":[145],"Jaccard":[146],"($AJ_{RD}$),":[147],"explicitly":[149],"evaluate":[150],"tracking":[151,188],"re-appearing":[153],"points.":[154,176],"To":[155],"improve":[156],"points,":[159],"tailored":[162],"geometric":[163],"augmentations,":[164],"such":[165],"as":[166],"periodic":[167],"roll":[168],"simulates":[170],"re-entries,":[172],"supervising":[174],"demonstrate":[178],"transformers":[181],"can":[182,200],"be":[183,201],"substantially":[184],"improved":[185],"for":[186],"set":[190],"state-of-the-art":[193],"multiple":[195],"benchmarks.":[196],"Model":[197],"code":[199],"found":[202],"https://tap-next-plus-plus.github.io.":[204]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
