{"id":"https://openalex.org/W7127644682","doi":"https://doi.org/10.48550/arxiv.2602.02995","title":"Agent Alpha: Tree Search Unifying Generation, Exploration and Evaluation for Computer-Use Agents","display_name":"Agent Alpha: Tree Search Unifying Generation, Exploration and Evaluation for Computer-Use Agents","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127644682","doi":"https://doi.org/10.48550/arxiv.2602.02995"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.02995","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125065786","display_name":"Sizhe Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tang, Sizhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125012799","display_name":"Rongqian Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Rongqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125076825","display_name":"Tian Lan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan, Tian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5125065786"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.40880000591278076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.40880000591278076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.1265999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.08219999819993973,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.701200008392334},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6294999718666077},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.5527999997138977},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4957999885082245},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4878999888896942},{"id":"https://openalex.org/keywords/search-tree","display_name":"Search tree","score":0.47279998660087585},{"id":"https://openalex.org/keywords/search-algorithm","display_name":"Search algorithm","score":0.36489999294281006},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.33379998803138733},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.3206999897956848}],"concepts":[{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.701200008392334},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6937999725341797},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6294999718666077},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.5527999997138977},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4957999885082245},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4878999888896942},{"id":"https://openalex.org/C207024777","wikidata":"https://www.wikidata.org/wiki/Q621673","display_name":"Search tree","level":3,"score":0.47279998660087585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4683000147342682},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4334000051021576},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37529999017715454},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.33379998803138733},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3206999897956848},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.3188000023365021},{"id":"https://openalex.org/C64943373","wikidata":"https://www.wikidata.org/wiki/Q2651003","display_name":"Alpha (finance)","level":4,"score":0.30230000615119934},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2865999937057495},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C831591","wikidata":"https://www.wikidata.org/wiki/Q59750","display_name":"Bidirectional search","level":5,"score":0.2743000090122223},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2727999985218048},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C93693863","wikidata":"https://www.wikidata.org/wiki/Q897659","display_name":"Branch and bound","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C155846161","wikidata":"https://www.wikidata.org/wiki/Q1143367","display_name":"Graphical model","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.26440000534057617},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C74072328","wikidata":"https://www.wikidata.org/wiki/Q1142726","display_name":"Intelligent agent","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.02995","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.02995","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.02995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.02995","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"scaling":[1],"test-time":[2],"compute":[3],"through":[4,48],"trajectory-level":[5,132],"sampling":[6],"has":[7],"significantly":[8,130],"improved":[9],"Graphical":[10],"User":[11],"Interface":[12],"(GUI)":[13],"agents,":[14],"the":[15,21,27,63,72,117],"lack":[16],"of":[17,23,62,83,112,127],"regressive":[18],"ability":[19],"prevents":[20],"reuse":[22],"partial":[24],"successes":[25],"and":[26,46,86,100],"recovery":[28],"from":[29],"early":[30,81],"missteps.":[31],"In":[32],"this":[33],"paper,":[34],"we":[35],"introduce":[36],"Agent":[37,75,120],"Alpha,":[38],"a":[39,105,123],"unified":[40],"framework":[41],"that":[42],"synergizes":[43],"generation,":[44],"exploration,":[45],"evaluation":[47,94],"step-level":[49],"Monte":[50],"Carlo":[51],"Tree":[52],"Search":[53],"(MCTS).":[54],"It":[55],"enables":[56,77],"active":[57],"modeling":[58],"or":[59],"exploiting":[60],"structures":[61],"planning":[64],"space.":[65,109],"By":[66],"integrating":[67],"alpha-UCT":[68,113],"guided":[69],"search":[70,108],"into":[71],"interaction":[73],"loop,":[74],"Alpha":[76,121],"deliberate":[78],"planning,":[79],"facilitating":[80],"pruning":[82],"suboptimal":[84],"branches":[85],"efficient":[87],"prefix":[88],"reuse.":[89],"We":[90],"also":[91],"employ":[92],"comparison-driven":[93],"to":[95,103],"mitigate":[96],"absolute":[97],"scoring":[98],"biases":[99],"diversity-constrained":[101],"expansion":[102],"maintain":[104],"compact,":[106],"informative":[107],"Regret":[110],"bound":[111],"is":[114],"analyzed.":[115],"On":[116],"OSWorld":[118],"benchmark,":[119],"achieves":[122],"state-of-the-art":[124],"success":[125],"rate":[126],"$\\sim":[128],"77\\%$,":[129],"outperforming":[131],"baselines":[133],"under":[134],"equivalent":[135],"compute.":[136]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-06T00:00:00"}
