{"id":"https://openalex.org/W7161279268","doi":"https://doi.org/10.48550/arxiv.2605.14406","title":"GeoViSTA: Geospatial Vision-Tabular Transformer for Multimodal Environment Representation","display_name":"GeoViSTA: Geospatial Vision-Tabular Transformer for Multimodal Environment Representation","publication_year":2026,"publication_date":"2026-05-14","ids":{"openalex":"https://openalex.org/W7161279268","doi":"https://doi.org/10.48550/arxiv.2605.14406"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.14406","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.14406","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136220137","display_name":"Yuhao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yuhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136194612","display_name":"Sadeer Al-Kindi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Al-Kindi, Sadeer","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135829173","display_name":"Ashok Veeraraghavan","orcid":"https://orcid.org/0000-0001-5043-7460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Veeraraghavan, Ashok","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136203839","display_name":"Guha Balakrishnan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balakrishnan, Guha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.13379999995231628,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.13379999995231628,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12597","display_name":"Fire Detection and Safety Systems","score":0.12129999697208405,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.0544000007212162,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/geospatial-analysis","display_name":"Geospatial analysis","score":0.8513000011444092},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5275999903678894},{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.5175999999046326},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4787999987602234},{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.47049999237060547},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4120999872684479},{"id":"https://openalex.org/keywords/earth-observation","display_name":"Earth observation","score":0.38510000705718994},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.36570000648498535}],"concepts":[{"id":"https://openalex.org/C9770341","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Geospatial analysis","level":2,"score":0.8513000011444092},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7249000072479248},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5275999903678894},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.5175999999046326},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4787999987602234},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.47049999237060547},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45320001244544983},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4120999872684479},{"id":"https://openalex.org/C39399123","wikidata":"https://www.wikidata.org/wiki/Q1348989","display_name":"Earth observation","level":3,"score":0.38510000705718994},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3707999885082245},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.36570000648498535},{"id":"https://openalex.org/C41856607","wikidata":"https://www.wikidata.org/wiki/Q483130","display_name":"Geographic information system","level":2,"score":0.3614000082015991},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C49261128","wikidata":"https://www.wikidata.org/wiki/Q1132455","display_name":"Hazard","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2678999900817871},{"id":"https://openalex.org/C202269582","wikidata":"https://www.wikidata.org/wiki/Q2644277","display_name":"Complementarity (molecular biology)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C2778102629","wikidata":"https://www.wikidata.org/wiki/Q725252","display_name":"Satellite imagery","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.14406","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.14406","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5612183213233948,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"pretraining":[1],"on":[2,144],"Earth":[3],"observation":[4],"imagery":[5,79],"has":[6],"yielded":[7],"powerful":[8],"representations":[9,177],"of":[10],"the":[11,26,43,167],"natural":[12],"and":[13,56,80,90,126,133,154],"built":[14],"environment.":[15],"However,":[16],"most":[17],"existing":[18],"geospatial":[19,74,180],"foundation":[20],"models":[21],"do":[22],"not":[23],"directly":[24],"model":[25],"structured":[27,171],"socioeconomic":[28,172],"covariates":[29],"typically":[30],"stored":[31],"in":[32,150],"tabular":[33,81,127],"form.":[34],"This":[35],"modality":[36],"gap":[37],"limits":[38],"their":[39],"ability":[40],"to":[41,87,121],"capture":[42],"complete":[44],"total":[45],"environment,":[46],"which":[47],"is":[48],"critical":[49],"for":[50,178],"reasoning":[51],"about":[52],"complex":[53],"environmental,":[54],"social,":[55],"health-related":[57],"outcomes.":[58],"In":[59],"this":[60],"work,":[61],"we":[62],"propose":[63],"GeoViSTA":[64,83,112],"(Geospatial":[65],"Vision-Tabular":[66],"Transformer),":[67],"a":[68,97,114],"vision-tabular":[69],"architecture":[70],"that":[71,101,164],"learns":[72],"unified":[73,138],"embeddings":[75,139],"from":[76],"co-registered":[77],"gridded":[78],"data.":[82],"utilizes":[84],"bilateral":[85],"cross-attention":[86],"exchange":[88],"spatial":[89,131],"semantic":[91],"information":[92],"across":[93,158],"modalities,":[94],"guided":[95],"by":[96],"geography-aware":[98],"attention":[99],"mechanism":[100],"aligns":[102],"continuous":[103],"image":[104,124],"patches":[105,125],"with":[106,113],"irregular":[107],"census-tract":[108],"tokens.":[109],"We":[110],"train":[111],"self-supervised":[115],"joint":[116],"masked-autoencoding":[117],"objective,":[118],"forcing":[119],"it":[120],"recover":[122],"missing":[123],"rows":[128],"using":[129],"local":[130],"context":[132,173],"cross-modal":[134],"cues.":[135],"Empirically,":[136],"GeoViSTA's":[137],"improve":[140],"linear":[141],"probing":[142],"performance":[143],"high-impact":[145],"downstream":[146],"tasks,":[147],"outperforming":[148],"baselines":[149],"predicting":[151],"disease-specific":[152],"mortality":[153],"fire":[155],"hazard":[156],"frequency":[157],"held-out":[159],"regions.":[160],"These":[161],"results":[162],"demonstrate":[163],"jointly":[165],"modeling":[166],"physical":[168],"environment":[169],"alongside":[170],"yields":[174],"highly":[175],"transferable":[176],"holistic":[179],"inference.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-16T00:00:00"}
