{"id":"https://openalex.org/W7077892335","doi":"https://doi.org/10.48550/arxiv.2508.17180","title":"MaRVL-QA: A Benchmark for Mathematical Reasoning over Visual Landscapes","display_name":"MaRVL-QA: A Benchmark for Mathematical Reasoning over Visual Landscapes","publication_year":2025,"publication_date":"2025-08-24","ids":{"openalex":"https://openalex.org/W7077892335","doi":"https://doi.org/10.48550/arxiv.2508.17180"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2508.17180","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.17180","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2508.17180","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Pande, Nilay","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pande, Nilay","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yerramilli, Sahiti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yerramilli, Sahiti","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tamarapalli, Jayant Sravan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tamarapalli, Jayant Sravan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Grover, Rynaa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grover, Rynaa","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6733999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6733999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.023800000548362732,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14311","display_name":"Electrical and Electromagnetic Research","score":0.018400000408291817,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6546000242233276},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6330999732017517},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5544999837875366},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.47749999165534973},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.46720001101493835},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.46129998564720154},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.44940000772476196},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.43470001220703125},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4124999940395355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6814000010490417},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6546000242233276},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6330999732017517},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6262999773025513},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5544999837875366},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.47749999165534973},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.46720001101493835},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.46129998564720154},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.44940000772476196},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.43470001220703125},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.40639999508857727},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3808000087738037},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3711000084877014},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3237999975681305},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.2822999954223633},{"id":"https://openalex.org/C76969082","wikidata":"https://www.wikidata.org/wiki/Q486902","display_name":"Mathematical model","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.26600000262260437},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.2581000030040741},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2508.17180","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.17180","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2508.17180","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.17180","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4839306175708771,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"key":[1],"frontier":[2],"for":[3,37,140],"Multimodal":[4],"Large":[5],"Language":[6],"Models":[7],"(MLLMs)":[8],"is":[9],"the":[10,43,48,141,152],"ability":[11],"to":[12,73,126,144],"perform":[13],"deep":[14],"mathematical":[15],"and":[16,89,95,150],"spatial":[17,132],"reasoning":[18,46,78,159],"directly":[19],"from":[20,47,103],"images,":[21],"moving":[22],"beyond":[23],"their":[24],"established":[25],"success":[26],"in":[27,52],"semantic":[28,49],"description.":[29],"Mathematical":[30],"surface":[31],"plots":[32],"provide":[33],"a":[34,69,104,136],"rigorous":[35,110],"testbed":[36],"this":[38,59],"capability,":[39],"as":[40],"they":[41],"isolate":[42],"task":[44],"of":[45,107,130,154],"noise":[50],"common":[51],"natural":[53],"images.":[54],"To":[55],"measure":[56,145],"progress":[57],"on":[58,115],"frontier,":[60],"we":[61],"introduce":[62],"MaRVL-QA":[63,116,134],"(Mathematical":[64],"Reasoning":[65],"over":[66],"Visual":[67],"Landscapes),":[68],"new":[70,138],"benchmark":[71,81],"designed":[72],"quantitatively":[74],"evaluate":[75],"these":[76],"core":[77],"skills.":[79],"The":[80],"comprises":[82],"two":[83],"novel":[84],"tasks:":[85],"Topological":[86],"Counting,":[87],"identifying":[88],"enumerating":[90],"features":[91],"like":[92],"local":[93],"maxima;":[94],"Transformation":[96],"Recognition,":[97],"recognizing":[98],"applied":[99],"geometric":[100],"transformations.":[101],"Generated":[102],"curated":[105],"library":[106],"functions":[108],"with":[109,156],"ambiguity":[111],"filtering,":[112],"our":[113],"evaluation":[114],"reveals":[117],"that":[118],"even":[119],"state-of-the-art":[120],"MLLMs":[121,155],"struggle":[122],"significantly,":[123],"often":[124],"resorting":[125],"superficial":[127],"heuristics":[128],"instead":[129],"robust":[131],"reasoning.":[133],"provides":[135],"challenging":[137],"tool":[139],"research":[142],"community":[143],"progress,":[146],"expose":[147],"model":[148],"limitations,":[149],"guide":[151],"development":[153],"more":[157],"profound":[158],"abilities.":[160]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
