{"id":"https://openalex.org/W7134902886","doi":"https://doi.org/10.48550/arxiv.2603.09774","title":"World2Mind: Cognition Toolkit for Allocentric Spatial Reasoning in Foundation Models","display_name":"World2Mind: Cognition Toolkit for Allocentric Spatial Reasoning in Foundation Models","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134902886","doi":"https://doi.org/10.48550/arxiv.2603.09774"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.09774","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.09774","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048633696","display_name":"Shouwei Ruan","orcid":"https://orcid.org/0009-0007-0481-5855"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ruan, Shouwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101516517","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0002-7635-6535"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128751924","display_name":"Zhenyu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Zhenyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015251615","display_name":"Qihui Zhu","orcid":"https://orcid.org/0009-0003-2490-7205"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Qihui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128797774","display_name":"Yuxiang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128693042","display_name":"Hang Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Hang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128763406","display_name":"Yubin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yubin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5048633696"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.5605000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.5605000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11904","display_name":"Spatial Cognition and Navigation","score":0.3199999928474426,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03150000050663948,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spatial-cognition","display_name":"Spatial cognition","score":0.6777999997138977},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.6643000245094299},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5651999711990356},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.49129998683929443},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.46380001306533813},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4636000096797943},{"id":"https://openalex.org/keywords/cognitive-map","display_name":"Cognitive map","score":0.41179999709129333},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.39989998936653137},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.39010000228881836}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7383000254631042},{"id":"https://openalex.org/C2777371692","wikidata":"https://www.wikidata.org/wiki/Q2178611","display_name":"Spatial cognition","level":3,"score":0.6777999997138977},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.6643000245094299},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6434000134468079},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5651999711990356},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.49129998683929443},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.46380001306533813},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4636000096797943},{"id":"https://openalex.org/C170494330","wikidata":"https://www.wikidata.org/wiki/Q1778434","display_name":"Cognitive map","level":3,"score":0.41179999709129333},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.39989998936653137},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.39010000228881836},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.3822999894618988},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.36469998955726624},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.3531999886035919},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34439998865127563},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3246999979019165},{"id":"https://openalex.org/C20854674","wikidata":"https://www.wikidata.org/wiki/Q4386060","display_name":"Cognitive architecture","level":3,"score":0.31040000915527344},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C161407221","wikidata":"https://www.wikidata.org/wiki/Q4382939","display_name":"Cognitive model","level":3,"score":0.28690001368522644},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.27570000290870667},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C196031653","wikidata":"https://www.wikidata.org/wiki/Q1501867","display_name":"Cartographic generalization","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.25119999051094055},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.09774","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.09774","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Achieving":[0],"robust":[1,93],"spatial":[2,33,44,56,73,82,170],"reasoning":[3,34,126],"remains":[4],"a":[5,54,124],"fundamental":[6],"challenge":[7],"for":[8],"current":[9],"Multimodal":[10],"Foundation":[11],"Models":[12],"(MFMs).":[13],"Existing":[14],"methods":[15],"either":[16],"overfit":[17],"statistical":[18],"shortcuts":[19],"via":[20],"3D":[21,64,120,169],"grounding":[22],"data":[23],"or":[24],"remain":[25],"confined":[26],"to":[27,70,78,106],"2D":[28],"visual":[29],"perception,":[30],"limiting":[31],"both":[32],"accuracy":[35],"and":[36,66,87,135],"generalization":[37],"in":[38],"unseen":[39],"scenarios.":[40],"Inspired":[41],"by":[42,153],"the":[43,108,116,145,159],"cognitive":[45,74],"mapping":[46],"mechanisms":[47],"of":[48,89,111,119,147,176],"biological":[49],"intelligence,":[50],"we":[51,122],"propose":[52],"World2Mind,":[53],"training-free":[55],"intelligence":[57],"toolkit.":[58],"At":[59],"its":[60],"core,":[61],"World2Mind":[62,96,143],"leverages":[63],"reconstruction":[65],"instance":[67],"segmentation":[68],"models":[69,165],"construct":[71],"structured":[72],"maps,":[75],"empowering":[76],"MFMs":[77],"proactively":[79],"acquire":[80],"targeted":[81],"knowledge":[83],"regarding":[84],"interested":[85],"landmarks":[86,112],"routes":[88],"interest.":[90],"To":[91,114],"provide":[92],"geometric-topological":[94],"priors,":[95],"synthesizes":[97],"an":[98],"Allocentric-Spatial":[99],"Tree":[100],"(AST)":[101],"that":[102,142,175],"uses":[103],"elliptical":[104],"parameters":[105],"model":[107],"top-down":[109],"layout":[110],"accurately.":[113],"mitigate":[115],"inherent":[117],"inaccuracies":[118],"reconstruction,":[121],"introduce":[123],"three-stage":[125],"chain":[127],"comprising":[128],"tool":[129],"invocation":[130],"assessment,":[131],"modality-decoupled":[132],"cue":[133],"collection,":[134],"geometry-semantics":[136],"interwoven":[137],"reasoning.":[138],"Extensive":[139],"experiments":[140],"demonstrate":[141],"boosts":[144],"performance":[146,173],"frontier":[148],"models,":[149],"such":[150],"as":[151],"GPT-5.2,":[152],"5%~18%.":[154],"Astonishingly,":[155],"relying":[156],"solely":[157],"on":[158],"AST-structured":[160],"text,":[161],"purely":[162],"text-only":[163],"foundation":[164],"can":[166],"perform":[167],"complex":[168],"reasoning,":[171],"achieving":[172],"approaching":[174],"advanced":[177],"multimodal":[178],"models.":[179]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-03-12T00:00:00"}
