{"id":"https://openalex.org/W7133529445","doi":"https://doi.org/10.48550/arxiv.2603.02573","title":"Track4World: Feedforward World-centric Dense 3D Tracking of All Pixels","display_name":"Track4World: Feedforward World-centric Dense 3D Tracking of All Pixels","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133529445","doi":"https://doi.org/10.48550/arxiv.2603.02573"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.02573","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128075286","display_name":"Jiahao Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lu, Jiahao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128101516","display_name":"Jiayi Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jiayi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128106270","display_name":"Wenbo Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Wenbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128094937","display_name":"Ruijie Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Ruijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128046770","display_name":"Chengfeng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Chengfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128117867","display_name":"Sai-Kit Yeung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yeung, Sai-Kit","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128069035","display_name":"Ying Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Ying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128047414","display_name":"Yuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yuan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5128075286"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9388999938964844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9388999938964844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.006200000178068876,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.005799999926239252,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.6503999829292297},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6308000087738037},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.630299985408783},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5871999859809875},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5069000124931335},{"id":"https://openalex.org/keywords/feed-forward","display_name":"Feed forward","score":0.4814999997615814},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.40290001034736633},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.39629998803138733}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7705000042915344},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7486000061035156},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6766999959945679},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.6503999829292297},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6308000087738037},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.630299985408783},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5871999859809875},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C38858127","wikidata":"https://www.wikidata.org/wiki/Q5441228","display_name":"Feed forward","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.39629998803138733},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.3873000144958496},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.31540000438690186},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.266400009393692},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25600001215934753},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.02573","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.02573","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02573","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.02573","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.5883815288543701}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Estimating":[0],"the":[1,20,42,73,79,99,116],"3D":[2,21,27,67,81,93,103,118,123,149],"trajectory":[3],"of":[4,19,23,69,125,128],"every":[5,70,126],"pixel":[6,71,127],"from":[7],"a":[8,16,46,58,86,91],"monocular":[9,26],"video":[10],"is":[11],"crucial":[12],"and":[13,102,148,154],"promising":[14],"for":[15,50,156],"comprehensive":[17],"understanding":[18],"dynamics":[22],"videos.":[24],"Recent":[25],"tracking":[28,38,68,124],"works":[29],"demonstrate":[30,136],"impressive":[31],"performance,":[32],"but":[33],"are":[34],"limited":[35],"to":[36,96],"either":[37],"sparse":[39],"points":[40],"on":[41,78,133],"first":[43],"frame":[44,108],"or":[45],"slow":[47],"optimization-based":[48],"framework":[49],"dense":[51,104],"tracking.":[52],"In":[53],"this":[54,129],"paper,":[55],"we":[56],"propose":[57],"feedforward":[59],"model,":[60],"called":[61],"Track4World,":[62],"enabling":[63],"an":[64],"efficient":[65,122],"holistic":[66],"in":[72,144],"world-centric":[74],"coordinate":[75],"system.":[76],"Built":[77],"global":[80],"scene":[82,112],"representation":[83],"encoded":[84],"by":[85],"VGGT-style":[87],"ViT,":[88],"Track4World":[89],"applies":[90],"novel":[92],"correlation":[94],"scheme":[95],"simultaneously":[97],"estimate":[98],"pixel-wise":[100],"2D":[101],"flow":[105,146],"between":[106],"arbitrary":[107],"pairs.":[109],"The":[110],"estimated":[111],"flow,":[113],"along":[114],"with":[115],"reconstructed":[117],"geometry,":[119],"enables":[120],"subsequent":[121],"video.":[130],"Extensive":[131],"experiments":[132],"multiple":[134],"benchmarks":[135],"that":[137],"our":[138],"approach":[139],"consistently":[140],"outperforms":[141],"existing":[142],"methods":[143],"2D/3D":[145],"estimation":[147],"tracking,":[150],"highlighting":[151],"its":[152],"robustness":[153],"scalability":[155],"real-world":[157],"4D":[158],"reconstruction":[159],"tasks.":[160]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-05T00:00:00"}
