{"id":"https://openalex.org/W7134809100","doi":"https://doi.org/10.48550/arxiv.2603.07660","title":"Holi-Spatial: Evolving Video Streams into Holistic 3D Spatial Intelligence","display_name":"Holi-Spatial: Evolving Video Streams into Holistic 3D Spatial Intelligence","publication_year":2026,"publication_date":"2026-03-08","ids":{"openalex":"https://openalex.org/W7134809100","doi":"https://doi.org/10.48550/arxiv.2603.07660"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.07660","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128689591","display_name":"Yuanyuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gao, Yuanyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128678190","display_name":"Hao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128638740","display_name":"Yifei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yifei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036569224","display_name":"Xinhao Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Xinhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125048445","display_name":"Yuning Gong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Yuning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064361747","display_name":"Yuanjun Liao","orcid":"https://orcid.org/0000-0002-9881-9879"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Yuanjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128653516","display_name":"Fangfu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Fangfu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128638476","display_name":"Manyuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Manyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128678366","display_name":"Yuchen Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yuchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128654764","display_name":"Dan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Dan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128660166","display_name":"Xue Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049609631","display_name":"Huaxi Huang","orcid":"https://orcid.org/0000-0002-6837-6747"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Huaxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128645757","display_name":"Hongjie Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hongjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128653243","display_name":"Ziwei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128637031","display_name":"Xiao Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128634183","display_name":"Dingwen Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Dingwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128657177","display_name":"Zhihang Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Zhihang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":["https://openalex.org/A5128689591"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8658999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8658999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.049800001084804535,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.008200000040233135,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.626800000667572},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.6132000088691711},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5659999847412109},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4706000089645386},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4142000079154968},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.4092999994754791},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.39969998598098755},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.39719998836517334},{"id":"https://openalex.org/keywords/geospatial-analysis","display_name":"Geospatial analysis","score":0.38190001249313354}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8436999917030334},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.626800000667572},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.6132000088691711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5753999948501587},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5659999847412109},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4706000089645386},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4142000079154968},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.39969998598098755},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.39719998836517334},{"id":"https://openalex.org/C9770341","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Geospatial analysis","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3790999948978424},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.3736000061035156},{"id":"https://openalex.org/C203689450","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial database","level":3,"score":0.3734000027179718},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3384000062942505},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.3352000117301941},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3158999979496002},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3102000057697296},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2809000015258789},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.07660","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.07660","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07660","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.07660","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"pursuit":[1],"of":[2,31],"spatial":[3,19,101,125,167,208],"intelligence":[4],"fundamentally":[5],"relies":[6],"on":[7,194,207],"access":[8],"to":[9,116,217],"large-scale,":[10,80,141],"fine-grained":[11],"3D":[12,41,107,143,155,162],"data.":[13,46],"However,":[14],"existing":[15,188],"approaches":[16],"predominantly":[17],"construct":[18,137],"understanding":[20],"benchmarks":[21],"by":[22,61],"generating":[23],"question-answer":[24],"(QA)":[25,127],"pairs":[26,169],"from":[27,43,85,104],"a":[28,48,130],"limited":[29],"number":[30],"manually":[32],"annotated":[33],"datasets,":[34],"rather":[35],"than":[36],"systematically":[37],"annotating":[38],"new":[39],"large-scale":[40],"scenes":[42],"raw":[44,86],"web":[45],"As":[47],"result,":[49],"their":[50],"scalability":[51],"is":[52,58],"severely":[53],"constrained,":[54],"and":[55,118,132,165,174,190,200],"model":[56,221],"performance":[57,181],"further":[59,136],"hindered":[60],"domain":[62],"gaps":[63],"inherent":[64],"in":[65,182,220],"these":[66],"narrowly":[67],"curated":[68],"datasets.":[69],"In":[70],"this":[71,212],"work,":[72],"we":[73,135],"propose":[74],"Holi-Spatial,":[75],"the":[76,93,139],"first":[77,140],"fully":[78],"automated,":[79],"spatially-aware":[81],"multimodal":[82],"dataset,":[83,145],"constructed":[84],"video":[87],"inputs":[88],"without":[89],"human":[90],"intervention,":[91],"using":[92,211],"proposed":[94],"data":[95,183],"curation":[96,184],"pipeline.":[97],"Holi-Spatial":[98,178],"supports":[99],"multi-level":[100],"supervision,":[102],"ranging":[103],"geometrically":[105],"accurate":[106],"Gaussian":[108],"Splatting":[109],"(3DGS)":[110],"reconstructions":[111],"with":[112,123],"rendered":[113],"depth":[114],"maps":[115],"object-level":[117],"relational":[119],"semantic":[120,144,175],"annotations,":[121],"together":[122],"corresponding":[124],"Question-Answer":[126],"pairs.":[128],"Following":[129],"principled":[131],"systematic":[133],"pipeline,":[134],"Holi-Spatial-4M,":[138],"high-quality":[142],"containing":[146],"12K":[147],"optimized":[148,192],"3DGS":[149],"scenes,":[150],"1.3M":[151],"2D":[152],"masks,":[153],"320K":[154,158],"bounding":[156],"boxes,":[157],"instance":[159],"captions,":[160],"1.2M":[161,166],"grounding":[163],"instances,":[164],"QA":[168],"spanning":[170],"diverse":[171],"geometric,":[172],"relational,":[173],"reasoning":[176,209],"tasks.":[177],"demonstrates":[179],"exceptional":[180],"quality,":[185],"significantly":[186],"outperforming":[187],"feed-forward":[189],"per-scene":[191],"methods":[193],"datasets":[195],"such":[196],"as":[197],"ScanNet,":[198],"ScanNet++,":[199],"DL3DV.":[201],"Furthermore,":[202],"fine-tuning":[203],"Vision-Language":[204],"Models":[205],"(VLMs)":[206],"tasks":[210],"dataset":[213],"has":[214],"also":[215],"led":[216],"substantial":[218],"improvements":[219],"performance.":[222]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
