{"id":"https://openalex.org/W7134248038","doi":"https://doi.org/10.1145/3742414.3795096","title":"EvalAgent: Interactive Comparative Evaluation of Computer-Using GUI Agents","display_name":"EvalAgent: Interactive Comparative Evaluation of Computer-Using GUI Agents","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134248038","doi":"https://doi.org/10.1145/3742414.3795096"},"language":null,"primary_location":{"id":"doi:10.1145/3742414.3795096","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3742414.3795096","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 31st International Conference on Intelligent User Interfaces","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3742414.3795096","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102965060","display_name":"Yukun Yang","orcid":"https://orcid.org/0009-0003-1971-4468"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yukun Yang","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029995655","display_name":"Simret Araya Gebreegziabher","orcid":"https://orcid.org/0000-0002-1772-6065"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Simret Araya Gebreegziabher","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123824335","display_name":"Hojun Yoo","orcid":null},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hojun Yoo","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109909965","display_name":"Charles Chiang","orcid":"https://orcid.org/0009-0008-6079-4355"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charles Chiang","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029148728","display_name":"Chaoran Chen","orcid":"https://orcid.org/0000-0002-9161-4088"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chaoran Chen","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093206754","display_name":"Annalisa Szymanski","orcid":null},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Annalisa Szymanski","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127911264","display_name":"Hyo Jin Do","orcid":null},"institutions":[{"id":"https://openalex.org/I4210087032","display_name":"Cambridge Scientific (United States)","ror":"https://ror.org/001s4dh65","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hyo Jin Do","raw_affiliation_strings":["IBM Research, Cambridge, Massachusetts, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Cambridge, Massachusetts, USA","institution_ids":["https://openalex.org/I4210087032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003440919","display_name":"Zahra Ashktorab","orcid":"https://orcid.org/0000-0002-0686-7911"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zahra Ashktorab","raw_affiliation_strings":["Thomas J. Watson Center, IBM Research, Yorktown Heights, New York, USA"],"affiliations":[{"raw_affiliation_string":"Thomas J. Watson Center, IBM Research, Yorktown Heights, New York, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022799238","display_name":"Werner Geyer","orcid":"https://orcid.org/0000-0003-4699-5026"},"institutions":[{"id":"https://openalex.org/I4210087032","display_name":"Cambridge Scientific (United States)","ror":"https://ror.org/001s4dh65","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Werner Geyer","raw_affiliation_strings":["IBM Research, Cambridge, Massachusetts, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Cambridge, Massachusetts, USA","institution_ids":["https://openalex.org/I4210087032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128414130","display_name":"Diego G\u00f3mez-Zar\u00e1","orcid":null},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Diego G\u00f3mez-Zar\u00e1","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007240808","display_name":"Toby Jia-Jun Li","orcid":"https://orcid.org/0000-0001-7902-7625"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Toby Jia-Jun Li","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA","institution_ids":["https://openalex.org/I107639228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5102965060"],"corresponding_institution_ids":["https://openalex.org/I107639228"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67447226,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"171","last_page":"175"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.15680000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.15680000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.09109999984502792,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10470","display_name":"Usability and User Interface Design","score":0.061900001019239426,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.3098999857902527},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.30309998989105225},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.26170000433921814},{"id":"https://openalex.org/keywords/user-interface","display_name":"User interface","score":0.25519999861717224},{"id":"https://openalex.org/keywords/graphical-user-interface","display_name":"Graphical user interface","score":0.249099999666214}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.63919997215271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3458999991416931},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3440999984741211},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3098999857902527},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C37789001","wikidata":"https://www.wikidata.org/wiki/Q782543","display_name":"Graphical user interface","level":2,"score":0.249099999666214},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.24469999969005585},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.24169999361038208}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3742414.3795096","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3742414.3795096","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 31st International Conference on Intelligent User Interfaces","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3742414.3795096","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3742414.3795096","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 31st International Conference on Intelligent User Interfaces","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W4389519254","https://openalex.org/W4396833505","https://openalex.org/W4399121418","https://openalex.org/W4404782219","https://openalex.org/W4408615073","https://openalex.org/W4409735433","https://openalex.org/W4411505886","https://openalex.org/W4412875498","https://openalex.org/W4414564705","https://openalex.org/W4415796160","https://openalex.org/W4416036639","https://openalex.org/W4416037109","https://openalex.org/W7131789684"],"related_works":[],"abstract_inverted_index":{"As":[0],"generalist":[1],"Computer-Using":[2],"Agents":[3],"(CUAs)":[4],"gain":[5],"popularity,":[6],"evaluating":[7],"them":[8],"solely":[9],"based":[10,121],"on":[11,122],"task":[12],"completion":[13],"is":[14],"insufficient":[15],"to":[16,39,52,86,136],"understand":[17],"their":[18,22,27,54],"reasoning":[19,44],"processes,":[20],"assess":[21,53],"robustness,":[23],"or":[24,49],"ensure":[25],"that":[26,113],"actions":[28],"align":[29],"with":[30,56],"user":[31,57,160],"intentions.":[32],"Developers":[33],"and":[34,43,51,59,91,105,139,162,164],"end-users":[35],"lack":[36],"systematic":[37],"tools":[38],"compare":[40],"the":[41,116,153],"behaviors":[42],"processes":[45],"of":[46,75,118],"different":[47],"agents":[48,78],"configurations,":[50],"alignment":[55],"preferences":[58,161],"values.":[60],"To":[61],"address":[62],"this":[63],"gap,":[64],"we":[65],"present":[66],"EvalAgent,":[67],"an":[68,80],"interactive":[69],"system":[70],"designed":[71],"for":[72],"criteria-based":[73],"evaluation":[74],"LLM-powered":[76],"web":[77],"using":[79],"LLM-as-a-Judge":[81,119],"paradigm.":[82],"EvalAgent":[83,129],"enables":[84],"users":[85],"(1)":[87],"configure":[88],"agent":[89,166],"models":[90],"personas":[92],"as":[93,100],"experimental":[94],"conditions;":[95],"(2)":[96],"visualize":[97],"execution":[98,134],"trajectories":[99],"directed":[101],"acyclic":[102],"graphs":[103],"(DAGs);":[104],"(3)":[106],"employ":[107],"Context-Aware":[108],"Judge\u2014a":[109],"multi-scale":[110],"auditing":[111],"mechanism":[112],"dynamically":[114],"adjusts":[115],"granularity":[117],"evaluations":[120],"user-defined":[123],"criteria.":[124],"Beyond":[125],"binary":[126],"pass/fail":[127],"verdicts,":[128],"highlights":[130],"relevant":[131],"segments":[132],"within":[133],"traces":[135],"surface":[137],"how":[138],"why":[140],"decisions":[141],"were":[142],"made.":[143],"This":[144],"workflow":[145],"supports":[146],"fine-grained,":[147],"full-trace":[148],"behavioral":[149],"auditing,":[150],"helping":[151],"bridge":[152],"gap":[154],"between":[155],"high-level":[156],"policy":[157],"constraints,":[158],"nuanced":[159],"values,":[163],"low-level":[165],"execution.":[167]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-03-10T00:00:00"}
