{"id":"https://openalex.org/W7162118722","doi":"https://doi.org/10.48550/arxiv.2605.22642","title":"Spreadsheet-RL: Advancing Large Language Model Agents on Realistic Spreadsheet Tasks via Reinforcement Learning","display_name":"Spreadsheet-RL: Advancing Large Language Model Agents on Realistic Spreadsheet Tasks via Reinforcement Learning","publication_year":2026,"publication_date":"2026-05-21","ids":{"openalex":"https://openalex.org/W7162118722","doi":"https://doi.org/10.48550/arxiv.2605.22642"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22642","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22642","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22642","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035190960","display_name":"Banghao Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Banghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136769847","display_name":"Yining Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Yining","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056474398","display_name":"Mingyuan Wu","orcid":"https://orcid.org/0009-0009-6146-2923"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Mingyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057511056","display_name":"Jingcheng Yang","orcid":"https://orcid.org/0000-0002-2550-3910"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jingcheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136770071","display_name":"Jize Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Jize","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010168076","display_name":"Zhaoheng Li","orcid":"https://orcid.org/0009-0002-0003-5062"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhaoheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136731962","display_name":"Shengyi Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Shengyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136786525","display_name":"Minjia Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Minjia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074224097","display_name":"Klara Nahrstedt","orcid":"https://orcid.org/0000-0001-6813-3043"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nahrstedt, Klara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135527265","display_name":"Rui Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136806667","display_name":"Xiangjun Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Xiangjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136745528","display_name":"Hanchao Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hanchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13197","display_name":"Spreadsheets and End-User Computing","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13197","display_name":"Spreadsheets and End-User Computing","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.0020000000949949026,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6218000054359436},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.5633000135421753},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.459199994802475},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4465999901294708},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4390000104904175},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4147000014781952},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4047999978065491},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.3400999903678894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8041999936103821},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6218000054359436},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5770000219345093},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.5633000135421753},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4465999901294708},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4390000104904175},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4147000014781952},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40950000286102295},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.3384000062942505},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3215000033378601},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2791999876499176},{"id":"https://openalex.org/C74072328","wikidata":"https://www.wikidata.org/wiki/Q1142726","display_name":"Intelligent agent","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C545109879","wikidata":"https://www.wikidata.org/wiki/Q5157412","display_name":"Computer Applications","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22642","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22642","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22642","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22642","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Spreadsheet":[0,140,147],"systems":[1],"(e.g.,":[2],"Microsoft":[3,94],"Excel,":[4],"Google":[5],"Sheets)":[6],"play":[7],"a":[8,40,79,92,139,154,159,164],"central":[9],"role":[10],"in":[11,118,228,242],"modern":[12],"data-centric":[13],"workflows.":[14],"As":[15],"AI":[16,185],"agents":[17,47,90],"grow":[18],"increasingly":[19],"capable":[20],"of":[21,73,105],"automating":[22],"complex":[23],"tasks,":[24],"such":[25,120],"as":[26,39,112,114,121],"controlling":[27],"computers":[28],"and":[29,123,168,191,205,225,231],"generating":[30],"presentations,":[31],"building":[32],"an":[33,99],"AI-driven":[34],"spreadsheet":[35,46,62,89,174,193,229],"agent":[36],"has":[37,58],"emerged":[38],"promising":[41],"research":[42],"direction.":[43],"Most":[44],"existing":[45],"rely":[48],"on":[49,60,188,199,212],"specialized":[50,88],"prompting":[51],"over":[52],"general-purpose":[53],"LLMs;":[54],"while":[55],"this":[56],"design":[57],"potentials":[59],"simple":[61],"operations,":[63],"it":[64,195],"struggles":[65],"to":[66,86,203,210],"manage":[67],"the":[68,131],"complex,":[69],"multi-step":[70],"workflows":[71],"typical":[72],"real-world":[74,226],"applications.":[75],"We":[76],"introduce":[77],"Spreadsheet-RL,":[78],"reinforcement":[80],"learning":[81],"(RL)":[82],"fine-tuning":[83],"framework":[84],"designed":[85,143,170],"train":[87],"within":[91],"realistic":[93],"Excel":[95,151],"environment.":[96],"Spreadsheet-RL":[97,182],"features":[98],"automated":[100],"pipeline":[101],"for":[102,144,173,223,235],"scalable":[103],"collection":[104],"paired":[106],"start-goal":[107],"spreadsheets":[108],"from":[109,201,208],"online":[110],"forums,":[111],"well":[113],"domain-specific":[115,192],"evaluation":[116],"tasks":[117],"areas":[119],"finance":[122],"supply":[124],"chain":[125],"management,":[126],"which":[127],"we":[128,179],"compile":[129],"into":[130],"new":[132],"Domain-Spreadsheet":[133,215],"benchmark":[134],"dataset.":[135,216],"It":[136],"also":[137],"includes":[138],"Gym":[141,148],"environment":[142],"multi-turn":[145],"RL:":[146],"exposes":[149],"extensive":[150],"functionality":[152],"through":[153],"Python":[155],"sandbox,":[156],"along":[157],"with":[158,239],"refined":[160],"harness":[161],"that":[162,181],"incorporates":[163],"comprehensive":[165,177],"tool":[166],"set":[167],"carefully":[169],"tool-routing":[171],"rules":[172],"tasks.":[175],"Through":[176],"experiments,":[178],"show":[180],"substantially":[183],"enhances":[184],"agent's":[186],"performance":[187],"both":[189],"general":[190],"tasks:":[194],"improves":[196],"Qwen3-4B-Thinking-2507's":[197],"Pass@1":[198,207],"SpreadsheetBench":[200],"12.0%":[202],"23.4%,":[204],"raises":[206],"8.4%":[209],"17.2%":[211],"our":[213],"curated":[214],"These":[217],"results":[218],"highlight":[219],"Spreadsheet-RL's":[220],"strong":[221],"potential":[222],"generalization":[224],"adoption":[227],"automation,":[230],"broadly,":[232],"its":[233],"promise":[234],"advancing":[236],"LLM-based":[237],"interactions":[238],"data":[240],"interfaces":[241],"everyday":[243],"work.":[244]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
