{"id":"https://openalex.org/W7138102159","doi":"https://doi.org/10.1609/aaai.v40i32.39974","title":"ProBench: Benchmarking GUI Agents with Accurate Process Information","display_name":"ProBench: Benchmarking GUI Agents with Accurate Process Information","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138102159","doi":"https://doi.org/10.1609/aaai.v40i32.39974"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i32.39974","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i32.39974","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39974/43935","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39974/43935","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129681548","display_name":"Leyang Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Leyang Yang","raw_affiliation_strings":["Zhejiang Key Laboratory of Accessible Perception and Intelligent Systems, Zhejiang University\nCollege of Computer Science and Technology, Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang Key Laboratory of Accessible Perception and Intelligent Systems, Zhejiang University\nCollege of Computer Science and Technology, Zhejiang University","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389386","display_name":"Ziwei Wang","orcid":"https://orcid.org/0000-0003-4479-3738"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziwei Wang","raw_affiliation_strings":["Zhejiang Key Laboratory of Accessible Perception and Intelligent Systems, Zhejiang University\nCollege of Computer Science and Technology, Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang Key Laboratory of Accessible Perception and Intelligent Systems, Zhejiang University\nCollege of Computer Science and Technology, Zhejiang University","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103533293","display_name":"Xiaoxuan Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoxuan Tang","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021120517","display_name":"Sheng Zhou","orcid":"https://orcid.org/0000-0003-0651-0071"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Zhou","raw_affiliation_strings":["Zhejiang Key Laboratory of Accessible Perception and Intelligent Systems, Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang Key Laboratory of Accessible Perception and Intelligent Systems, Zhejiang University","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103155183","display_name":"Dajun Chen","orcid":"https://orcid.org/0009-0000-9532-7636"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dajun Chen","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129715428","display_name":"Wei Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Jiang","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129641179","display_name":"Yong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yong Li","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129681548"],"corresponding_institution_ids":["https://openalex.org/I55712492"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.44444444,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"32","first_page":"27547","last_page":"27555"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.19449999928474426,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.19449999928474426,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10470","display_name":"Usability and User Interface Design","score":0.06710000336170197,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10525","display_name":"Human-Automation Interaction and Safety","score":0.05420000106096268,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7710999846458435},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7163000106811523},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.7098000049591064},{"id":"https://openalex.org/keywords/graphical-user-interface","display_name":"Graphical user interface","score":0.6829000115394592},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5608000159263611},{"id":"https://openalex.org/keywords/user-interface","display_name":"User interface","score":0.4643000066280365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7896999716758728},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7710999846458435},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7163000106811523},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.7098000049591064},{"id":"https://openalex.org/C37789001","wikidata":"https://www.wikidata.org/wiki/Q782543","display_name":"Graphical user interface","level":2,"score":0.6829000115394592},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5608000159263611},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.541700005531311},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.4643000066280365},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4587000012397766},{"id":"https://openalex.org/C66153210","wikidata":"https://www.wikidata.org/wiki/Q5597182","display_name":"Graphical user interface testing","level":4,"score":0.4528999924659729},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4345000088214874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3612000048160553},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3172999918460846},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.30410000681877136},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27790001034736633}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i32.39974","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i32.39974","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39974/43935","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i32.39974","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i32.39974","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39974/43935","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138102159.pdf","grobid_xml":"https://content.openalex.org/works/W7138102159.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,16,30,37,54,75,124],"deep":[2],"integration":[3],"of":[4,40,63,156,161],"artificial":[5],"intelligence":[6],"and":[7,22,92,137,184],"interactive":[8],"technology,":[9],"Graphical":[10],"User":[11],"Interface":[12],"(GUI)":[13],"Agent,":[14],"as":[15],"carrier":[17],"connecting":[18],"goal-oriented":[19],"natural":[20],"language":[21],"real-world":[23,169],"devices,":[24],"has":[25,83],"received":[26],"widespread":[27],"attention":[28],"from":[29],"community.":[31],"Contemporary":[32],"benchmarks":[33],"aim":[34],"to":[35,85,133],"evaluate":[36],"comprehensive":[38,111],"capabilities":[39],"GUI":[41,44,59,118,163,170],"agents":[42,164],"in":[43,74],"operation":[45,60],"tasks,":[46],"generally":[47],"determining":[48],"task":[49],"completion":[50],"solely":[51],"by":[52],"inspecting":[53],"final":[55,76],"screen":[56],"state.":[57],"However,":[58],"tasks":[61,119],"consist":[62],"multiple":[64],"chained":[65],"steps":[66,88],"while":[67],"not":[68],"all":[69],"critical":[70],"information":[71,97],"is":[72],"presented":[73],"few":[77,81],"pages.":[78],"Although":[79],"a":[80,110,139],"research":[82],"begun":[84],"incorporate":[86],"intermediate":[87],"into":[89],"evaluation,":[90,128],"accurately":[91],"automatically":[93,148],"capturing":[94],"this":[95,105],"process":[96,151],"still":[98],"remains":[99],"an":[100],"open":[101],"challenge.":[102],"To":[103],"address":[104],"weakness,":[106],"we":[107,129],"introduce":[108],"ProBench,":[109],"mobile":[112],"benchmark":[113],"with":[114],"over":[115],"200":[116],"challenging":[117],"covering":[120],"widely-used":[121],"scenarios.":[122,171],"Remaining":[123],"traditional":[125],"State-related":[126],"Task":[127,136],"extend":[130],"our":[131],"dataset":[132],"include":[134],"Process-related":[135],"design":[138],"specialized":[140],"evaluation":[141,160],"method.":[142],"A":[143,188],"newly":[144],"introduced":[145],"Process":[146],"Provider":[147],"supplies":[149],"accurate":[150],"information,":[152],"enabling":[153],"presice":[154],"assessment":[155],"agent's":[157],"performance.":[158],"Our":[159],"advanced":[162],"reveals":[165],"significant":[166],"limitations":[167],"for":[168,200],"These":[172],"shortcomings":[173],"are":[174],"prevalent":[175],"across":[176],"diverse":[177],"models,":[178],"including":[179],"both":[180],"large-scale":[181],"generalist":[182],"models":[183],"smaller,":[185],"GUI-specific":[186],"models.":[187],"detailed":[189],"error":[190],"analysis":[191],"further":[192],"exposes":[193],"several":[194],"universal":[195],"problems,":[196],"outlining":[197],"concrete":[198],"directions":[199],"future":[201],"improvements.":[202]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
