{"id":"https://openalex.org/W7128848049","doi":"https://doi.org/10.48550/arxiv.2602.11980","title":"Spatial Chain-of-Thought: Bridging Understanding and Generation Models for Spatial Reasoning Generation","display_name":"Spatial Chain-of-Thought: Bridging Understanding and Generation Models for Spatial Reasoning Generation","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128848049","doi":"https://doi.org/10.48550/arxiv.2602.11980"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11980","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125898381","display_name":"Wei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009700188","display_name":"Yancheng Long","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Long, Yancheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075247695","display_name":"Mingqiao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Mingqiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055214559","display_name":"Haojie Ding","orcid":"https://orcid.org/0000-0003-0731-1816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Haojie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125990595","display_name":"Yankai Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yankai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125988608","display_name":"Hongyang Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Hongyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125972516","display_name":"Yi-Fan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yi-Fan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125977537","display_name":"Bin Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125914122","display_name":"Fan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Fan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125930464","display_name":"Tingting Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Tingting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125960013","display_name":"Han Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125959385","display_name":"Long Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Long","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5125898381"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7121000289916992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7121000289916992,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.17080000042915344,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.020600000396370888,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7832000255584717},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.7754999995231628},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.5450999736785889},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5282999873161316},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.3659999966621399},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.35670000314712524},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.3476000130176544}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7832000255584717},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.7754999995231628},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7638000249862671},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.5450999736785889},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5386999845504761},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5282999873161316},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.35670000314712524},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3294000029563904},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C2776873942","wikidata":"https://www.wikidata.org/wiki/Q149013","display_name":"Spatial planning","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2624000012874603}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11980","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11980","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11980","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11980","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"diffusion":[1,82,89],"models":[2],"have":[3],"shown":[4],"exceptional":[5],"capabilities":[6,74,118],"in":[7,150],"aesthetic":[8],"image":[9,134,151],"synthesis,":[10],"they":[11,34],"often":[12],"struggle":[13],"with":[14,77],"complex":[15,142],"spatial":[16,46,116],"understanding":[17],"and":[18,137],"reasoning.":[19],"Existing":[20],"approaches":[21],"resort":[22],"to":[23,29,109,120],"Multimodal":[24],"Large":[25],"Language":[26],"Models":[27],"(MLLMs)":[28],"enhance":[30,87],"this":[31],"capability.":[32],"However,":[33],"either":[35],"incur":[36],"high":[37],"computational":[38],"costs":[39],"through":[40],"joint":[41],"training":[42,94],"or":[43],"suffer":[44],"from":[45],"information":[47],"loss":[48],"when":[49],"relying":[50],"solely":[51],"on":[52,96,133,141],"textual":[53],"prompts.":[54],"To":[55],"alleviate":[56],"these":[57],"limitations,":[58],"we":[59,85],"propose":[60],"a":[61,66],"Spatial":[62],"Chain-of-Thought":[63],"(SCoT)":[64],"framework,":[65],"plug-and-play":[67],"approach":[68],"that":[69,127],"effectively":[70],"bridges":[71],"the":[72,78,88,121],"reasoning":[73,143],"of":[75,81],"MLLMs":[76,106],"generative":[79],"power":[80],"models.":[83],"Specifically,":[84],"first":[86],"model's":[90],"layout":[91,112],"awareness":[92],"by":[93],"it":[95],"an":[97],"interleaved":[98],"text-coordinate":[99],"instruction":[100],"format.":[101],"We":[102],"then":[103],"leverage":[104],"state-of-the-art":[105,131],"as":[107],"planners":[108],"generate":[110],"comprehensive":[111],"plans,":[113],"transferring":[114],"their":[115],"planning":[117],"directly":[119],"generation":[122,135],"process.":[123],"Extensive":[124],"experiments":[125],"demonstrate":[126],"our":[128],"method":[129],"achieves":[130],"performance":[132],"benchmarks":[136],"significantly":[138],"outperforms":[139],"baselines":[140],"tasks,":[144],"while":[145],"also":[146],"showing":[147],"strong":[148],"efficacy":[149],"editing":[152],"scenarios.":[153]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-14T00:00:00"}
