{"id":"https://openalex.org/W7130585528","doi":"https://doi.org/10.48550/arxiv.2602.15918","title":"EarthSpatialBench: Benchmarking Spatial Reasoning Capabilities of Multimodal LLMs on Earth Imagery","display_name":"EarthSpatialBench: Benchmarking Spatial Reasoning Capabilities of Multimodal LLMs on Earth Imagery","publication_year":2026,"publication_date":"2026-02-17","ids":{"openalex":"https://openalex.org/W7130585528","doi":"https://doi.org/10.48550/arxiv.2602.15918"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.15918","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048168090","display_name":"Zelin Xu","orcid":"https://orcid.org/0009-0004-4419-3155"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Zelin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126400118","display_name":"Yupu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yupu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062626843","display_name":"Saugat Adhikari","orcid":"https://orcid.org/0000-0002-7846-2200"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adhikari, Saugat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126392015","display_name":"Md. Saiful Islam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Islam, Saiful","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008753758","display_name":"Tingsong Xiao","orcid":"https://orcid.org/0000-0002-1466-9421"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Tingsong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122394615","display_name":"Zibo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122399715","display_name":"Shigang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shigang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070582509","display_name":"Da Yan","orcid":"https://orcid.org/0000-0002-4653-0408"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Da","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126394705","display_name":"Zhe Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Zhe","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5048168090"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6927000284194946,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6927000284194946,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.19750000536441803,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11904","display_name":"Spatial Cognition and Navigation","score":0.05380000174045563,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.6757000088691711},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.6610000133514404},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.6283000111579895},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5353000164031982},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.512499988079071},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.48829999566078186},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.48019999265670776},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4794999957084656},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.4602000117301941},{"id":"https://openalex.org/keywords/geospatial-analysis","display_name":"Geospatial analysis","score":0.43959999084472656}],"concepts":[{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.6757000088691711},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.6610000133514404},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.6283000111579895},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6263999938964844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6141999959945679},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5353000164031982},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.512499988079071},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.48829999566078186},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.48019999265670776},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4794999957084656},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.4602000117301941},{"id":"https://openalex.org/C9770341","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Geospatial analysis","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.41110000014305115},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3874000012874603},{"id":"https://openalex.org/C2777371692","wikidata":"https://www.wikidata.org/wiki/Q2178611","display_name":"Spatial cognition","level":3,"score":0.3758000135421753},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.367900013923645},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3418999910354614},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3280999958515167},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.3276999890804291},{"id":"https://openalex.org/C153938966","wikidata":"https://www.wikidata.org/wiki/Q3348148","display_name":"Object-based spatial database","level":4,"score":0.321399986743927},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.3082999885082245},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C199538142","wikidata":"https://www.wikidata.org/wiki/Q573980","display_name":"Topological map","level":4,"score":0.2856000065803528},{"id":"https://openalex.org/C203689450","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial database","level":3,"score":0.27959999442100525},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C194226119","wikidata":"https://www.wikidata.org/wiki/Q161779","display_name":"Spatial reference system","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C39399123","wikidata":"https://www.wikidata.org/wiki/Q1348989","display_name":"Earth observation","level":3,"score":0.25949999690055847},{"id":"https://openalex.org/C75145180","wikidata":"https://www.wikidata.org/wiki/Q772007","display_name":"Georeference","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.15918","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.15918","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15918","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.15918","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Benchmarking":[0],"spatial":[1,36,86,92,131,152,206],"reasoning":[2,37,55,132,150,207],"in":[3,13,50,133,204],"multimodal":[4],"large":[5],"language":[6],"models":[7,200],"(MLLMs)":[8],"has":[9,41],"attracted":[10],"growing":[11],"interest":[12],"computer":[14],"vision":[15],"due":[16],"to":[17,201],"its":[18],"importance":[19],"for":[20,79,103,129],"embodied":[21],"AI":[22],"and":[23,53,59,66,75,90,106,112,148,154,165,170,180,189,198],"other":[24],"agentic":[25],"systems":[26],"that":[27],"require":[28],"precise":[29],"interaction":[30],"with":[31],"the":[32,205],"physical":[33],"world.":[34],"However,":[35],"on":[38,84,135,195],"Earth":[39,80,136],"imagery":[40,81],"lagged":[42],"behind,":[43],"as":[44],"it":[45],"uniquely":[46],"involves":[47],"grounding":[48],"objects":[49],"georeferenced":[51],"images":[52],"quantitatively":[54],"about":[56,151],"distances,":[57],"directions,":[58],"topological":[60,110,158],"relations":[61,93],"using":[62],"both":[63,196],"visual":[64,178],"cues":[65],"vector":[67],"geometry":[68,182],"coordinates":[69],"(e.g.,":[70,94],"2D":[71,85,185],"bounding":[72,117,186],"boxes,":[73,187],"polylines,":[74,188],"polygons).":[76],"Existing":[77],"benchmarks":[78],"primarily":[82],"focus":[83],"grounding,":[87],"image":[88],"captioning,":[89],"coarse":[91],"simple":[95],"directional":[96],"or":[97],"proximity":[98],"cues).":[99],"They":[100],"lack":[101],"support":[102],"quantitative":[104,149],"direction":[105],"distance":[107,153],"reasoning,":[108],"systematic":[109,157],"relations,":[111],"complex":[113],"object":[114,172],"geometries":[115],"beyond":[116],"boxes.":[118],"To":[119],"fill":[120],"this":[121],"gap,":[122],"we":[123],"propose":[124],"\\textbf{EarthSpatialBench},":[125],"a":[126],"comprehensive":[127],"benchmark":[128,139],"evaluating":[130],"MLLMs":[134],"imagery.":[137],"The":[138],"contains":[140],"over":[141],"325K":[142],"question-answer":[143],"pairs":[144],"spanning:":[145],"(1)":[146],"qualitative":[147],"direction;":[155],"(2)":[156],"relations;":[159],"(3)":[160],"single-object":[161],"queries,":[162,164],"object-pair":[163],"compositional":[166],"aggregate":[167],"group":[168],"queries;":[169],"(4)":[171],"references":[173],"expressed":[174],"via":[175],"textual":[176],"descriptions,":[177],"overlays,":[179],"explicit":[181],"coordinates,":[183],"including":[184],"polygons.":[190],"We":[191],"conducted":[192],"extensive":[193],"experiments":[194],"open-source":[197],"proprietary":[199],"identify":[202],"limitations":[203],"of":[208],"MLLMs.":[209]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-20T00:00:00"}
