{"id":"https://openalex.org/W7147583774","doi":"https://doi.org/10.48550/arxiv.2603.29139","title":"SciVisAgentBench: A Benchmark for Evaluating Scientific Data Analysis and Visualization Agents","display_name":"SciVisAgentBench: A Benchmark for Evaluating Scientific Data Analysis and Visualization Agents","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7147583774","doi":"https://doi.org/10.48550/arxiv.2603.29139"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.29139","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29139","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.29139","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109649214","display_name":"Kunpeng Ai","orcid":"https://orcid.org/0009-0005-7171-6529"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ai, Kuangshi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078150260","display_name":"Haichao Miao","orcid":"https://orcid.org/0000-0001-6580-2918"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao, Haichao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130240884","display_name":"Kaiyuan Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Kaiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116357976","display_name":"Nathaniel Gorski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gorski, Nathaniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101961720","display_name":"Jianxin Sun","orcid":"https://orcid.org/0009-0007-0003-3821"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jianxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057655212","display_name":"Guoxi Liu","orcid":"https://orcid.org/0000-0002-8164-7185"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Guoxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001437954","display_name":"Helgi I. Ing\u00f3lfsson","orcid":"https://orcid.org/0000-0002-7613-9143"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ingolfsson, Helgi I.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132697487","display_name":"David Lenz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lenz, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132561579","display_name":"Hanqi Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Hanqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132551043","display_name":"Hongfeng Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hongfeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132730194","display_name":"Teja Leburu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leburu, Teja","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132595208","display_name":"Michael Molash","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Molash, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132572045","display_name":"Bei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132642557","display_name":"Tom Peterka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peterka, Tom","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130219756","display_name":"Chaoli Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chaoli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132578320","display_name":"Shusen Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shusen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":["https://openalex.org/A5109649214"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.22300000488758087,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.22300000488758087,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2003999948501587,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.1324000060558319,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8248999714851379},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.7390999794006348},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.7024999856948853},{"id":"https://openalex.org/keywords/data-visualization","display_name":"Data visualization","score":0.46970000863075256},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.44130000472068787},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4074000120162964},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.35249999165534973}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8248999714851379},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7732999920845032},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.7390999794006348},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.7024999856948853},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5250999927520752},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.46970000863075256},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.44130000472068787},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3785000145435333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35830000042915344},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.35249999165534973},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.290800005197525},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.28450000286102295},{"id":"https://openalex.org/C14669888","wikidata":"https://www.wikidata.org/wiki/Q4014850","display_name":"Creative visualization","level":3,"score":0.27459999918937683},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.29139","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29139","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.29139","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29139","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,39,64,174],"large":[3],"language":[4,14],"models":[5],"(LLMs)":[6],"have":[7],"enabled":[8],"agentic":[9,175],"systems":[10],"that":[11,101],"translate":[12],"natural":[13],"intent":[15],"into":[16],"executable":[17],"scientific":[18,54],"visualization":[19,58,78],"(SciVis)":[20],"tasks.":[21],"Despite":[22],"rapid":[23],"progress,":[24],"the":[25,130],"community":[26],"lacks":[27],"a":[28,47,65,96,121,161],"principled":[29],"and":[30,49,57,77,115,134,145,153,171],"reproducible":[31],"benchmark":[32,51,61,163,178],"for":[33,52],"evaluating":[34,53],"these":[35],"emerging":[36],"SciVis":[37,88,126,143],"agents":[38,144,148],"realistic,":[40],"multi-step":[41],"analysis":[42,56],"settings.":[43],"We":[44,118],"present":[45],"SciVisAgentBench,":[46],"comprehensive":[48],"extensible":[50],"data":[55,73],"agents.":[59],"Our":[60],"is":[62,158,179],"grounded":[63],"structured":[66],"taxonomy":[67],"spanning":[68],"four":[69],"dimensions:":[70],"application":[71],"domain,":[72],"type,":[74],"complexity":[75],"level,":[76],"operation.":[79],"It":[80],"currently":[81],"comprises":[82],"108":[83],"expert-crafted":[84],"cases":[85],"covering":[86],"diverse":[87],"scenarios.":[89],"To":[90],"enable":[91],"reliable":[92],"assessment,":[93],"we":[94,140],"introduce":[95],"multimodal":[97],"outcome-centric":[98],"evaluation":[99],"pipeline":[100],"combines":[102],"LLM-based":[103],"judging":[104],"with":[105,124],"deterministic":[106],"evaluators,":[107],"including":[108],"image-based":[109],"metrics,":[110],"code":[111],"checkers,":[112],"rule-based":[113],"verifiers,":[114],"case-specific":[116],"evaluators.":[117],"also":[119],"conduct":[120],"validity":[122],"study":[123],"12":[125],"experts":[127],"to":[128,149,164],"examine":[129],"agreement":[131],"between":[132],"human":[133],"LLM":[135],"judges.":[136],"Using":[137],"this":[138],"framework,":[139],"evaluate":[141],"representative":[142],"general-purpose":[146],"coding":[147],"establish":[150],"initial":[151],"baselines":[152],"reveal":[154],"capability":[155],"gaps.":[156],"SciVisAgentBench":[157],"designed":[159],"as":[160],"living":[162],"support":[165],"systematic":[166],"comparison,":[167],"diagnose":[168],"failure":[169],"modes,":[170],"drive":[172],"progress":[173],"SciVis.":[176],"The":[177],"available":[180],"at":[181],"https://scivisagentbench.github.io/.":[182]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-02T00:00:00"}
