{"id":"https://openalex.org/W7134040750","doi":"https://doi.org/10.48550/arxiv.2603.04989","title":"TAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events","display_name":"TAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events","publication_year":2026,"publication_date":"2026-03-05","ids":{"openalex":"https://openalex.org/W7134040750","doi":"https://doi.org/10.48550/arxiv.2603.04989"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.04989","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034185345","display_name":"Jiaxiong Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiaxiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128269937","display_name":"Zhen Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Zhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101490398","display_name":"Jinpu Zhang","orcid":"https://orcid.org/0000-0002-0617-0452"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jinpu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128275988","display_name":"Yi Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128254966","display_name":"Hui Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Hui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128220489","display_name":"Xieyuanli Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xieyuanli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5055104872","display_name":"DeWen HU","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Dewen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.5414000153541565,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.5414000153541565,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.0731000006198883,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12153","display_name":"Advanced Optical Sensing Technologies","score":0.05900000035762787,"subfield":{"id":"https://openalex.org/subfields/3105","display_name":"Instrumentation"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.7049999833106995},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6366000175476074},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5109000205993652},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.49219998717308044},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.47189998626708984},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.453900009393692},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4332999885082245},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.40549999475479126},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4043999910354614}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7549999952316284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7084000110626221},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7075999975204468},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.7049999833106995},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6366000175476074},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5109000205993652},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.49219998717308044},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.47189998626708984},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.453900009393692},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4332999885082245},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.40549999475479126},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4043999910354614},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.3779999911785126},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.3546999990940094},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3075999915599823},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.29910001158714294},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C2780799671","wikidata":"https://www.wikidata.org/wiki/Q17087362","display_name":"Transient (computer programming)","level":2,"score":0.28940001130104065},{"id":"https://openalex.org/C200336642","wikidata":"https://www.wikidata.org/wiki/Q15058706","display_name":"Point set registration","level":3,"score":0.28139999508857727},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C2778999744","wikidata":"https://www.wikidata.org/wiki/Q7208292","display_name":"Point target","level":3,"score":0.26010000705718994},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2563999891281128}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.04989","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.04989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.04989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.04989","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.4898100793361664,"display_name":"Reduced inequalities"},{"id":"https://metadata.un.org/sdg/16","score":0.4155496656894684,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tracking":[0],"any":[1],"point":[2,72,159,174],"(TAP)":[3],"is":[4,77],"a":[5,55,78,108,143,162],"fundamental":[6],"yet":[7,32],"challenging":[8],"task":[9],"in":[10,165],"computer":[11],"vision,":[12],"requiring":[13],"high":[14],"precision":[15],"and":[16,26,45,65,69,103,125,152],"long-term":[17],"motion":[18,153],"reasoning.":[19],"Recent":[20],"attempts":[21],"to":[22,42,120],"combine":[23],"RGB":[24],"frames":[25,64,92,102],"event":[27,95],"streams":[28],"have":[29],"shown":[30],"promise,":[31],"they":[33],"typically":[34],"rely":[35],"on":[36,172],"synchronous":[37],"or":[38,131],"non-adaptive":[39],"fusion,":[40],"leading":[41],"temporal":[43,88],"misalignment":[44],"severe":[46],"degradation":[47],"when":[48],"one":[49],"modality":[50,121],"fails.":[51],"We":[52],"introduce":[53],"TAPFormer,":[54],"transformer-based":[56],"framework":[57],"that":[58],"performs":[59],"asynchronous":[60],"temporal-consistent":[61],"fusion":[62],"of":[63],"events":[66],"for":[67],"robust":[68],"high-frequency":[70],"arbitrary":[71],"tracking.":[73],"Our":[74,155],"key":[75],"innovation":[76],"Transient":[79],"Asynchronous":[80],"Fusion":[81,112],"(TAF)":[82],"mechanism,":[83],"which":[84],"explicitly":[85],"models":[86],"the":[87,98,181],"evolution":[89],"between":[90,100],"discrete":[91],"through":[93],"continuous":[94],"updates,":[96],"bridging":[97],"gap":[99],"low-rate":[101],"high-rate":[104],"events.":[105],"In":[106],"addition,":[107],"Cross-modal":[109],"Locally":[110],"Weighted":[111],"(CLWF)":[113],"module":[114],"adaptively":[115],"adjusts":[116],"spatial":[117],"attention":[118],"according":[119],"reliability,":[122],"yielding":[123],"stable":[124],"discriminative":[126],"features":[127],"even":[128],"under":[129,138,149],"blur":[130],"low":[132],"light.":[133],"To":[134],"evaluate":[135],"our":[136,177],"approach":[137],"realistic":[139],"conditions,":[140],"we":[141],"construct":[142],"novel":[144],"real-world":[145],"frame-event":[146],"TAP":[147],"dataset":[148],"diverse":[150],"illumination":[151],"conditions.":[154],"method":[156],"outperforms":[157],"existing":[158],"trackers,":[160],"achieving":[161],"28.2%":[163],"improvement":[164],"average":[166],"pixel":[167],"error":[168],"within":[169],"threshold.":[170],"Moreover,":[171],"standard":[173],"tracking":[175],"benchmarks,":[176],"tracker":[178],"consistently":[179],"achieves":[180],"best":[182],"performance.":[183],"Project":[184],"website:":[185],"tapformer.github.io":[186]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-07T00:00:00"}
