{"id":"https://openalex.org/W7135169359","doi":"https://doi.org/10.48550/arxiv.2603.12255","title":"Spatial-TTT: Streaming Visual-based Spatial Intelligence with Test-Time Training","display_name":"Spatial-TTT: Streaming Visual-based Spatial Intelligence with Test-Time Training","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135169359","doi":"https://doi.org/10.48550/arxiv.2603.12255"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12255","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128985170","display_name":"Fangfu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Fangfu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129091885","display_name":"Diankun Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Diankun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129073541","display_name":"Jiawei Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Jiawei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128947199","display_name":"Yimo Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Yimo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070505006","display_name":"Yi\u2010Hsin Hung","orcid":"https://orcid.org/0000-0002-9940-2055"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hung, Yi-Hsin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128978141","display_name":"Xumin Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Xumin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129071196","display_name":"Hao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128941290","display_name":"Han Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082694832","display_name":"Yongming Rao","orcid":"https://orcid.org/0000-0003-3952-8753"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rao, Yongming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129047966","display_name":"Yueqi Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Yueqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12849999964237213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12849999964237213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.10140000283718109,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.0966000035405159,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.7577000260353088},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.6814000010490417},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5491999983787537},{"id":"https://openalex.org/keywords/spatial-configuration","display_name":"Spatial configuration","score":0.46700000762939453},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.43560001254081726},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.4043000042438507},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.40230000019073486},{"id":"https://openalex.org/keywords/spatial-database","display_name":"Spatial database","score":0.4002000093460083}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8246999979019165},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.7577000260353088},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.6814000010490417},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5626999735832214},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5491999983787537},{"id":"https://openalex.org/C2781179811","wikidata":"https://www.wikidata.org/wiki/Q28134081","display_name":"Spatial configuration","level":3,"score":0.46700000762939453},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.43560001254081726},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.4043000042438507},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4032999873161316},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C203689450","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial database","level":3,"score":0.4002000093460083},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.38659998774528503},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3831000030040741},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.35659998655319214},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3528999984264374},{"id":"https://openalex.org/C158709400","wikidata":"https://www.wikidata.org/wiki/Q3578586","display_name":"Spatial ecology","level":2,"score":0.32600000500679016},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.30730000138282776},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2728999853134155},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26739999651908875},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.25850000977516174}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.7287235856056213,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Humans":[0],"perceive":[1],"and":[2,18,48,77,91,130,158,177],"understand":[3],"real-world":[4],"spaces":[5],"through":[6],"a":[7,69,88,111,140,165],"stream":[8],"of":[9,71],"visual":[10],"observations.":[11],"Therefore,":[12],"the":[13,124,149],"ability":[14],"to":[15,75,115,126,151,156],"streamingly":[16],"maintain":[17],"update":[19,152],"spatial":[20,30,43,61,79,101,107,145,162,175,183],"evidence":[21,80],"from":[22],"potentially":[23],"unbounded":[24],"video":[25,102,182],"streams":[26],"is":[27,35,45],"essential":[28],"for":[29,99],"intelligence.":[31],"The":[32],"core":[33],"challenge":[34],"not":[36],"simply":[37],"longer":[38],"context":[39],"windows":[40],"but":[41],"how":[42],"information":[44],"selected,":[46],"organized,":[47],"retained":[49],"over":[50,81],"time.":[51],"In":[52],"this":[53],"paper,":[54],"we":[55,86,109,138],"propose":[56],"Spatial-TTT":[57,172],"towards":[58],"streaming":[59],"visual-based":[60],"intelligence":[62],"with":[63,96,118,142],"test-time":[64],"training":[65],"(TTT),":[66],"which":[67,122,147],"adapts":[68],"subset":[70],"parameters":[72],"(fast":[73],"weights)":[74],"capture":[76,127],"organize":[78,159],"long-horizon":[82,174],"scene":[83],"videos.":[84],"Specifically,":[85],"design":[87],"hybrid":[89],"architecture":[90,136],"adopt":[92],"large-chunk":[93],"updates":[94],"parallel":[95],"sliding-window":[97],"attention":[98],"efficient":[100],"processing.":[103],"To":[104],"further":[105],"promote":[106],"awareness,":[108],"introduce":[110],"spatial-predictive":[112],"mechanism":[113],"applied":[114],"TTT":[116],"layers":[117],"3D":[119,144,161],"spatiotemporal":[120],"convolution,":[121],"encourages":[123],"model":[125,150],"geometric":[128],"correspondence":[129],"temporal":[131],"continuity":[132],"across":[133],"frames.":[134],"Beyond":[135],"design,":[137],"construct":[139],"dataset":[141],"dense":[143],"descriptions,":[146],"guides":[148],"its":[153],"fast":[154],"weights":[155],"memorize":[157],"global":[160],"signals":[163],"in":[164],"structured":[166],"manner.":[167],"Extensive":[168],"experiments":[169],"demonstrate":[170],"that":[171],"improves":[173],"understanding":[176],"achieves":[178],"state-of-the-art":[179],"performance":[180],"on":[181],"benchmarks.":[184],"Project":[185],"page:":[186],"https://liuff19.github.io/Spatial-TTT.":[187]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
