{"id":"https://openalex.org/W7125893560","doi":"https://doi.org/10.48550/arxiv.2601.19099","title":"m2sv: A Scalable Benchmark for Map-to-Street-View Spatial Reasoning","display_name":"m2sv: A Scalable Benchmark for Map-to-Street-View Spatial Reasoning","publication_year":2026,"publication_date":"2026-01-27","ids":{"openalex":"https://openalex.org/W7125893560","doi":"https://doi.org/10.48550/arxiv.2601.19099"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.19099","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090705633","display_name":"Yosub Shin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shin, Yosub","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123315270","display_name":"Michael Buriek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Buriek, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5041560478","display_name":"Igor Molybog","orcid":"https://orcid.org/0000-0001-7251-3532"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Molybog, Igor","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9735999703407288,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9735999703407288,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.006099999882280827,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.00430000014603138,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8102999925613403},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6789000034332275},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5680000185966492},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.517300009727478},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.5169000029563904},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4514000117778778},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.43959999084472656},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.39640000462532043}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8102999925613403},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7164000272750854},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6789000034332275},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.632099986076355},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5680000185966492},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5526999831199646},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.517300009727478},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.5169000029563904},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4514000117778778},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.39640000462532043},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.28360000252723694},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.25459998846054077}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.19099","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.19099","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.19099","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.19099","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision--language":[0],"models":[1,38],"(VLMs)":[2],"achieve":[3],"strong":[4,85],"performance":[5,86],"on":[6,13,87,99,165],"many":[7],"multimodal":[8,89],"benchmarks":[9],"but":[10],"remain":[11],"brittle":[12],"spatial":[14,34,167],"reasoning":[15,35,79,131,160,168],"tasks":[16],"that":[17,36],"require":[18],"aligning":[19,45],"abstract":[20],"overhead":[21,48],"representations":[22],"with":[23,50,68,72],"egocentric":[24],"views.":[25],"We":[26,61],"introduce":[27],"m2sv,":[28,100],"a":[29,46,51,64,74],"scalable":[30],"benchmark":[31,67],"for":[32,81],"map-to-street-view":[33,130],"asks":[37],"to":[39],"infer":[40],"camera":[41],"viewing":[42],"direction":[43],"by":[44],"north-up":[47],"map":[49],"Street":[52],"View":[53],"image":[54],"captured":[55],"at":[56],"the":[57,91,103],"same":[58],"real-world":[59],"intersection.":[60],"release":[62],"m2sv-20k,":[63],"geographically":[65],"diverse":[66],"controlled":[69],"ambiguity,":[70],"along":[71],"m2sv-sft-11k,":[73],"curated":[75],"set":[76],"of":[77,106,145],"structured":[78],"traces":[80],"supervised":[82,109],"fine-tuning.":[83],"Despite":[84],"existing":[88],"benchmarks,":[90],"best":[92],"evaluated":[93],"VLM":[94],"achieves":[95],"only":[96],"65.2%":[97],"accuracy":[98],"far":[101],"below":[102],"human":[104,137],"baseline":[105],"95%.":[107],"While":[108],"fine-tuning":[110],"and":[111,136,139,159],"reinforcement":[112],"learning":[113],"yield":[114],"consistent":[115],"gains,":[116],"cross-benchmark":[117],"evaluations":[118],"reveal":[119],"limited":[120],"transfer.":[121],"Beyond":[122],"aggregate":[123],"accuracy,":[124],"we":[125],"systematically":[126],"analyze":[127],"difficulty":[128],"in":[129,154],"using":[132],"both":[133],"structural":[134],"signals":[135],"effort,":[138],"conduct":[140],"an":[141],"extensive":[142],"failure":[143],"analysis":[144],"adapted":[146],"open":[147],"models.":[148],"Our":[149],"findings":[150],"highlight":[151],"persistent":[152],"gaps":[153],"geometric":[155],"alignment,":[156],"evidence":[157],"aggregation,":[158],"consistency,":[161],"motivating":[162],"future":[163],"work":[164],"grounded":[166],"across":[169],"viewpoints.":[170]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-29T00:00:00"}
