{"id":"https://openalex.org/W7154728512","doi":"https://doi.org/10.48550/arxiv.2604.14526","title":"FreqTrack: Frequency Learning based Vision Transformer for RGB-Event Object Tracking","display_name":"FreqTrack: Frequency Learning based Vision Transformer for RGB-Event Object Tracking","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154728512","doi":"https://doi.org/10.48550/arxiv.2604.14526"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14526","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037442437","display_name":"Jinlin You","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"You, Jinlin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133904790","display_name":"Muyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Muyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133875530","display_name":"Xudong Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Xudong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.5177000164985657,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.5177000164985657,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.054499998688697815,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.05299999937415123,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/wavelet","display_name":"Wavelet","score":0.5339999794960022},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.43650001287460327},{"id":"https://openalex.org/keywords/bittorrent-tracker","display_name":"BitTorrent tracker","score":0.43540000915527344},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.40310001373291016},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39809998869895935},{"id":"https://openalex.org/keywords/frequency-domain","display_name":"Frequency domain","score":0.39169999957084656},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.3905999958515167},{"id":"https://openalex.org/keywords/wavelet-transform","display_name":"Wavelet transform","score":0.3896999955177307},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.384799987077713},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.3824999928474426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6991999745368958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6854000091552734},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6244999766349792},{"id":"https://openalex.org/C47432892","wikidata":"https://www.wikidata.org/wiki/Q831390","display_name":"Wavelet","level":2,"score":0.5339999794960022},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.43650001287460327},{"id":"https://openalex.org/C57501372","wikidata":"https://www.wikidata.org/wiki/Q2021268","display_name":"BitTorrent tracker","level":3,"score":0.43540000915527344},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.40310001373291016},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39809998869895935},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.39169999957084656},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.3905999958515167},{"id":"https://openalex.org/C196216189","wikidata":"https://www.wikidata.org/wiki/Q2867","display_name":"Wavelet transform","level":3,"score":0.3896999955177307},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.384799987077713},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3824999928474426},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.35690000653266907},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3303000032901764},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.29660001397132874},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.27970001101493835},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2687000036239624},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C103824480","wikidata":"https://www.wikidata.org/wiki/Q185889","display_name":"Time domain","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Existing":[0],"single-modal":[1],"RGB":[2],"trackers":[3],"often":[4],"face":[5],"performance":[6],"bottlenecks":[7],"in":[8,33,129],"complex":[9],"dynamic":[10,92],"scenes,":[11],"while":[12],"the":[13,34,47,137,156,160],"introduction":[14],"of":[15,54,153,162],"event":[16,55,123],"sensors":[17],"offers":[18],"new":[19],"potential":[20],"for":[21,76,165],"enhancing":[22],"tracking":[23,66],"capabilities.":[24],"However,":[25],"most":[26],"current":[27],"RGB-event":[28],"fusion":[29],"methods,":[30],"primarily":[31],"designed":[32],"spatial":[35],"domain":[36],"using":[37],"convolutional,":[38],"Transformer,":[39],"or":[40],"Mamba":[41],"architectures,":[42],"fail":[43],"to":[44,95,116],"fully":[45],"exploit":[46],"unique":[48],"temporal":[49],"response":[50],"and":[51,98,131,139],"high-frequency":[52],"characteristics":[53],"data.":[56],"To":[57],"address":[58],"this,":[59],"we1":[60],"propose":[61],"FreqTrack,":[62],"a":[63,83,105],"frequency-aware":[64],"RGBE":[65,166],"framework":[67],"that":[68,89,143],"establishes":[69],"complementary":[70],"inter-modal":[71],"correlations":[72],"through":[73],"frequency-domain":[74,100,163],"transformations":[75],"more":[77],"robust":[78],"feature":[79],"fusion.":[80],"We":[81],"design":[82],"Spectral":[84],"Enhancement":[85],"Transformer":[86],"(SET)":[87],"layer":[88],"incorporates":[90],"multi-head":[91],"Fourier":[93],"filtering":[94],"adaptively":[96],"enhance":[97],"select":[99],"features.":[101],"Additionally,":[102],"we":[103],"develop":[104],"Wavelet":[106],"Edge":[107],"Refinement":[108],"(WER)":[109],"module,":[110],"which":[111],"leverages":[112],"learnable":[113],"wavelet":[114],"transforms":[115],"explicitly":[117],"extract":[118],"multi-scale":[119],"edge":[120],"structures":[121],"from":[122],"data,":[124],"effectively":[125],"improving":[126],"modeling":[127,164],"capability":[128],"high-speed":[130],"low-light":[132],"scenarios.":[133],"Extensive":[134],"experiments":[135],"on":[136,155],"COESOT":[138,157],"FE108":[140],"datasets":[141],"demonstrate":[142],"FreqTrack":[144],"achieves":[145],"highly":[146],"competitive":[147],"performance,":[148],"particularly":[149],"attaining":[150],"leading":[151],"precision":[152],"76.6\\%":[154],"benchmark,":[158],"validating":[159],"effectiveness":[161],"tracking.":[167]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-18T00:00:00"}
