{"id":"https://openalex.org/W7146995672","doi":"https://doi.org/10.48550/arxiv.2603.27967","title":"Learning Multi-View Spatial Reasoning from Cross-View Relations","display_name":"Learning Multi-View Spatial Reasoning from Cross-View Relations","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7146995672","doi":"https://doi.org/10.48550/arxiv.2603.27967"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.27967","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27967","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.27967","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101369767","display_name":"Suchae Jeong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jeong, Suchae","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132672141","display_name":"Jaehwi Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Jaehwi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lee, Haeone","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Haeone","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132699210","display_name":"Hanna Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Hanna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132591935","display_name":"Jian Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132648891","display_name":"Dongjun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Dongjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132725992","display_name":"Dong Kyu Shin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shin, Dong Kyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128192559","display_name":"Changyeon Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Changyeon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132636595","display_name":"Dongyoon Hahm","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hahm, Dongyoon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132605691","display_name":"Woogyeol Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Woogyeol","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132703580","display_name":"Juheon Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Juheon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132577246","display_name":"Kimin Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Kimin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5101369767"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9053000211715698,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9053000211715698,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.01940000057220459,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.011099999770522118,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.8501999974250793},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.7670000195503235},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.6220999956130981},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.6111000180244446},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5121999979019165},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.4921000003814697},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.46000000834465027},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4318999946117401},{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.38519999384880066}],"concepts":[{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.8501999974250793},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.7670000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.689300000667572},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6657999753952026},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.6220999956130981},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.6111000180244446},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5121999979019165},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.4921000003814697},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.46000000834465027},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4318999946117401},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.38519999384880066},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3799000084400177},{"id":"https://openalex.org/C2778662690","wikidata":"https://www.wikidata.org/wiki/Q3125339","display_name":"Spatial ability","level":3,"score":0.3747999966144562},{"id":"https://openalex.org/C2777371692","wikidata":"https://www.wikidata.org/wiki/Q2178611","display_name":"Spatial cognition","level":3,"score":0.36980000138282776},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.36959999799728394},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3528999984264374},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.32839998602867126},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3278999924659729},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.28200000524520874},{"id":"https://openalex.org/C2985665543","wikidata":"https://www.wikidata.org/wiki/Q3560550","display_name":"Spatial learning","level":3,"score":0.2651999890804291},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.26409998536109924},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C102993220","wikidata":"https://www.wikidata.org/wiki/Q387196","display_name":"Description logic","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C2986522900","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relationship","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.27967","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27967","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.27967","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27967","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"models":[1],"(VLMs)":[2],"have":[3],"achieved":[4],"impressive":[5],"results":[6,122],"on":[7,91,96,119,127],"single-view":[8],"vision":[9],"tasks,":[10],"but":[11],"lack":[12],"the":[13],"multi-view":[14,98,133],"spatial":[15,48,72,82,101,129],"reasoning":[16,49,73,102,134],"capabilities":[17],"essential":[18],"for":[19],"embodied":[20],"AI":[21],"systems":[22],"to":[23,45,138],"understand":[24],"3D":[25,62],"environments":[26],"and":[27,64,84,99,105,135],"manipulate":[28],"objects":[29,77],"across":[30,50,78],"different":[31],"viewpoints.":[32],"In":[33],"this":[34],"work,":[35],"we":[36],"introduce":[37],"Cross-View":[38],"Relations":[39],"(XVR),":[40],"a":[41],"large-scale":[42],"dataset":[43],"designed":[44],"teach":[46],"VLMs":[47,89],"multiple":[51],"views.":[52],"XVR":[53,92],"comprises":[54],"100K":[55],"vision-question-answer":[56],"samples":[57],"derived":[58],"from":[59],"18K":[60],"diverse":[61],"scenes":[63],"70K":[65],"robotic":[66,100,140],"manipulation":[67],"trajectories,":[68],"spanning":[69],"three":[70],"fundamental":[71],"tasks:":[74],"Correspondence":[75],"(matching":[76],"views),":[79],"Verification":[80],"(validating":[81],"relationships),":[83],"Localization":[85],"(identifying":[86],"object":[87],"positions).":[88],"fine-tuned":[90],"achieve":[93],"substantial":[94],"improvements":[95],"established":[97],"benchmarks":[103],"(MindCube":[104],"RoboSpatial).":[106],"When":[107],"integrated":[108],"as":[109],"backbones":[110],"in":[111],"Vision-Language-Action":[112],"models,":[113],"XVR-trained":[114],"representations":[115],"improve":[116],"success":[117],"rates":[118],"RoboCasa.":[120],"Our":[121],"demonstrate":[123],"that":[124],"explicit":[125],"training":[126],"cross-view":[128],"relations":[130],"significantly":[131],"enhances":[132],"transfers":[136],"effectively":[137],"real-world":[139],"manipulation.":[141]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-04-02T00:00:00"}
