{"id":"https://openalex.org/W7161587772","doi":"https://doi.org/10.48550/arxiv.2605.15777","title":"SaaS-Bench: Can Computer-Use Agents Leverage Real-World SaaS to Solve Professional Workflows?","display_name":"SaaS-Bench: Can Computer-Use Agents Leverage Real-World SaaS to Solve Professional Workflows?","publication_year":2026,"publication_date":"2026-05-15","ids":{"openalex":"https://openalex.org/W7161587772","doi":"https://doi.org/10.48550/arxiv.2605.15777"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.15777","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15777","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.15777","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020080521","display_name":"Kean Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Kean","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136449203","display_name":"Zihang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136403264","display_name":"Tianyi Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Tianyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128047408","display_name":"Zengji Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Zengji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136389016","display_name":"Jialong Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jialong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136419343","display_name":"Xinbo Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xinbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133609147","display_name":"Qingyao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Qingyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136367005","display_name":"Ruoyu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Ruoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136355786","display_name":"Weichu Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Weichu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136375358","display_name":"Ming Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Ming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136409463","display_name":"Jason Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Jason","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136415042","display_name":"Michael Heinrich","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heinrich, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136397547","display_name":"Elvis Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Elvis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136412744","display_name":"Liang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136395227","display_name":"Kuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Kuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136384910","display_name":"Baobao Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Baobao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.1509999930858612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.1509999930858612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.06449999660253525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.0640999972820282,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7160000205039978},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.476500004529953},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4050999879837036},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.37689998745918274},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.37119999527931213},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3709000051021576},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.36730000376701355},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.36059999465942383},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.3582000136375427}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7857000231742859},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7160000205039978},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.506600022315979},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.476500004529953},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4050999879837036},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3709000051021576},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.36730000376701355},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.36059999465942383},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C199776023","wikidata":"https://www.wikidata.org/wiki/Q202875","display_name":"Negotiation","level":2,"score":0.34790000319480896},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.32499998807907104},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C175133352","wikidata":"https://www.wikidata.org/wiki/Q1254596","display_name":"Software as a service","level":4,"score":0.3057999908924103},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2890999913215637},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2718000113964081},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.26969999074935913},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26460000872612}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.15777","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15777","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.15777","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15777","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Computer-Using":[0],"Agents":[1],"(CUAs)":[2],"are":[3,61,129,177],"rapidly":[4],"extending":[5],"large":[6,72],"language":[7],"models":[8],"(LLMs)":[9],"beyond":[10],"text-based":[11],"reasoning":[12],"toward":[13],"action":[14],"execution":[15],"in":[16,54,113,166],"more":[17],"complex":[18],"environments,":[19],"such":[20],"as":[21,68],"web":[22,31],"browsers":[23],"and":[24,32,78,88,125,128,140,173],"graphical":[25],"user":[26],"interfaces":[27],"(GUIs).":[28],"However,":[29],"existing":[30],"GUI":[33],"agent":[34],"benchmarks":[35],"often":[36],"rely":[37],"on":[38,100,150],"simplified":[39],"settings,":[40,127],"isolated":[41],"tasks,":[42],"or":[43],"short-horizon":[44],"interactions,":[45],"making":[46],"it":[47],"difficult":[48],"to":[49],"assess":[50],"capabilities":[51],"of":[52,74,161],"agents":[53,148],"realistic":[55,114],"professional":[56,107],"workflows.":[57],"Software-as-a-Service":[58],"(SaaS)":[59],"environments":[60],"a":[62,71,97],"natural":[63],"choice":[64],"for":[65,181],"CUA":[66],"evaluation,":[67],"they":[69],"host":[70],"share":[73],"modern":[75],"digital":[76],"work":[77,115],"naturally":[79],"involve":[80],"dynamic":[81],"system":[82],"states,":[83],"cross-application":[84,170],"coordination,":[85],"domain-specific":[86],"knowledge,":[87],"long-horizon":[89,120],"dependencies.":[90],"To":[91],"this":[92],"end,":[93],"we":[94],"introduce":[95],"SaaS-Bench,":[96,151],"benchmark":[98],"built":[99],"23":[101],"deployable":[102],"SaaS":[103],"systems":[104],"across":[105],"six":[106],"domains,":[108],"containing":[109],"106":[110],"tasks":[111,118,162],"grounded":[112],"scenarios.":[116],"These":[117],"require":[119],"execution,":[121],"cover":[122],"both":[123],"text-only":[124],"multimodal":[126],"evaluated":[130],"with":[131,152],"weighted":[132],"verification":[133],"checkpoints":[134],"that":[135,145],"measure":[136],"strict":[137],"task":[138],"completion":[139],"partial":[141],"progress.":[142],"Experiments":[143],"show":[144],"representative":[146],"LLM-based":[147],"struggle":[149],"even":[153],"the":[154],"strongest":[155],"model":[156],"completing":[157],"fewer":[158],"than":[159],"4%":[160],"end-to-end,":[163],"exposing":[164],"limitations":[165],"planning,":[167],"state":[168],"tracking,":[169],"context":[171],"maintenance,":[172],"error":[174],"recovery.":[175],"Code":[176],"available":[178],"at":[179],"https://github.com/UniPat-AI/SaaS-Bench":[180],"reproduction.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-19T00:00:00"}
