{"id":"https://openalex.org/W7162393045","doi":"https://doi.org/10.48550/arxiv.2605.25624","title":"CUA-Gym: Scaling Verifiable Training Environments and Tasks for Computer-Use Agents","display_name":"CUA-Gym: Scaling Verifiable Training Environments and Tasks for Computer-Use Agents","publication_year":2026,"publication_date":"2026-05-25","ids":{"openalex":"https://openalex.org/W7162393045","doi":"https://doi.org/10.48550/arxiv.2605.25624"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.25624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.25624","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137054163","display_name":"Bowen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101404592","display_name":"Dunjie Lu","orcid":"https://orcid.org/0009-0001-1447-2610"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Dunjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137079785","display_name":"Junli Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Junli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102781208","display_name":"Tianyi Bai","orcid":"https://orcid.org/0009-0009-5057-7100"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Tianyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137011466","display_name":"Shixuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shixuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137065337","display_name":"Zhipeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhipeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137044582","display_name":"Haiquan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Haiquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137074700","display_name":"Hao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137063651","display_name":"Tianbao Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Tianbao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137005404","display_name":"Shuai Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Shuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137073692","display_name":"Dayiheng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Dayiheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073347572","display_name":"Que Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Que","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137047101","display_name":"Junyang Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Junyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137013003","display_name":"Tao Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.6057999730110168,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.6057999730110168,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.07360000163316727,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.04619999974966049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.6225000023841858},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5767999887466431},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.574400007724762},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.572700023651123},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.4936999976634979},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.46219998598098755},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4512999951839447},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.3935999870300293},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.36000001430511475}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8287000060081482},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.6225000023841858},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5767999887466431},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.574400007724762},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.572700023651123},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.4936999976634979},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.46219998598098755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45730000734329224},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41819998621940613},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.36000001430511475},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.34850001335144043},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3458999991416931},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.34299999475479126},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.29899999499320984},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2903999984264374},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.28850001096725464},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27889999747276306},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2639000117778778},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C74072328","wikidata":"https://www.wikidata.org/wiki/Q1142726","display_name":"Intelligent agent","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.25624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.25624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5036563873291016}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"with":[2,35,195,216],"verifiable":[3,50],"rewards":[4],"(RLVR)":[5],"has":[6,25],"driven":[7],"breakthroughs":[8],"in":[9,163,191,220],"domains":[10],"such":[11,39],"as":[12],"math,":[13],"tool-use,":[14],"and":[15,49,63,84,94,98,133,201,205,224,253],"software":[16],"engineering,":[17],"yet":[18],"its":[19],"extension":[20],"to":[21],"computer-use":[22],"agents":[23],"(CUAs)":[24],"been":[26],"bottlenecked":[27],"by":[28,174],"the":[29,92,104,108,115,139,145,168,233,240,246],"scarcity":[30,146],"of":[31,147,157,170,184],"scalable":[32,76],"training":[33,148,188,241],"data":[34,40,173,222],"deterministic":[36],"rewards.":[37],"Constructing":[38],"for":[41],"CUAs":[42,212],"requires":[43],"consistent":[44],"task":[45,80,109],"instruction,":[46],"executable":[47],"environment,":[48],"reward.":[51],"However,":[52],"hand-curated":[53],"benchmarks":[54],"achieve":[55,203],"high":[56],"reward":[57,85,105],"fidelity":[58],"but":[59,68],"cover":[60],"few":[61],"applications":[62,161],"LLM-as-judge-based":[64],"datasets":[65],"scale":[66,169],"broadly":[67],"lack":[69],"reliable":[70],"verification.":[71],"We":[72,243],"present":[73],"CUA-Gym,":[74,181,198],"a":[75,88,99,126,154,182],"pipeline":[77],"that":[78],"co-generates":[79],"instructions,":[81],"environment":[82,96,225],"states,":[83,97],"functions.":[86],"Concretely,":[87],"Generator":[89],"agent":[90,102,113,134],"constructs":[91],"initial":[93],"golden":[95],"separate":[100],"Discriminator":[101],"writes":[103],"function":[106],"from":[107],"specification.":[110],"An":[111],"orchestrator":[112],"drives":[114],"two":[116],"through":[117],"iterative":[118],"rounds":[119],"upon":[120],"execution.":[121],"Generated":[122],"tuples":[123,189],"then":[124],"pass":[125],"final":[127],"filter":[128],"combining":[129],"LLM":[130],"majority":[131],"voting":[132],"rollouts,":[135],"ensuring":[136],"quality":[137],"beyond":[138,239],"per-task":[140],"adversarial":[141],"loop.":[142],"To":[143],"address":[144],"environments,":[149,252],"we":[150,179],"further":[151],"synthesize":[152],"CUA-Gym-Hub,":[153],"broad":[155],"suite":[156],"high-fidelity":[158],"mock":[159],"web":[160],"grounded":[162,190],"real-world":[164],"software-use":[165],"distributions,":[166],"expanding":[167],"CUA":[171],"RLVR":[172,187],"magnitude.":[175],"Using":[176],"this":[177],"pipeline,":[178,249],"construct":[180],"dataset":[183],"32,112":[185],"verified":[186],"110":[192],"environments.":[193,242],"Trained":[194],"GSPO":[196],"on":[197,207,232],"our":[199],"CUA-Gym-A3B":[200],"CUA-Gym-A17B":[202],"62.1%":[204],"72.6%":[206],"OSWorld-Verified,":[208],"outperforming":[209],"prior":[210],"open-source":[211,245],"at":[213],"comparable":[214],"scales,":[215],"performance":[217],"scaling":[218],"smoothly":[219],"both":[221],"volume":[223],"diversity.":[226],"The":[227],"same":[228],"checkpoints":[229],"also":[230],"improve":[231],"held-out":[234],"WebArena":[235],"benchmark,":[236],"indicating":[237],"transfer":[238],"will":[244],"full":[247],"synthesis":[248],"dataset,":[250],"CUA-Gym-Hub":[251],"models.":[254]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-27T00:00:00"}
