{"id":"https://openalex.org/W4403333972","doi":"https://doi.org/10.1145/3654777.3676382","title":"LlamaTouch: A Faithful and Scalable Testbed for Mobile UI Task Automation","display_name":"LlamaTouch: A Faithful and Scalable Testbed for Mobile UI Task Automation","publication_year":2024,"publication_date":"2024-10-11","ids":{"openalex":"https://openalex.org/W4403333972","doi":"https://doi.org/10.1145/3654777.3676382"},"language":"en","primary_location":{"id":"doi:10.1145/3654777.3676382","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3654777.3676382","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th Annual ACM Symposium on User Interface Software and Technology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047883479","display_name":"Li Lyna Zhang","orcid":"https://orcid.org/0000-0003-0779-8310"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Li Zhang","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0000-0003-0779-8310","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042671591","display_name":"Shihe Wang","orcid":"https://orcid.org/0009-0001-6883-2036"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shihe Wang","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0001-6883-2036","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084563214","display_name":"Xianqing Jia","orcid":"https://orcid.org/0000-0001-7309-2936"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianqing Jia","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0000-0001-7309-2936","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111357593","display_name":"Zhihan Zheng","orcid":"https://orcid.org/0009-0004-8896-4222"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihan Zheng","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0004-8896-4222","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113148266","display_name":"Yunhe Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunhe Yan","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0001-1798-9826","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100590599","display_name":"Longxi Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longxi Gao","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0003-9762-1160","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100628298","display_name":"Yuanchun Li","orcid":"https://orcid.org/0000-0002-1591-2526"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanchun Li","raw_affiliation_strings":["Institute for AI Industry Research (AIR), Tsinghua University, China"],"raw_orcid":"https://orcid.org/0000-0002-1591-2526","affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research (AIR), Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089690212","display_name":"Mengwei Xu","orcid":"https://orcid.org/0000-0001-6271-6993"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengwei Xu","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0000-0001-6271-6993","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5047883479"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":1.3956,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.8295845,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.8913663625717163},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7612453103065491},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6469572186470032},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6334806680679321},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.4887795150279999},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.43204209208488464},{"id":"https://openalex.org/keywords/mobile-computing","display_name":"Mobile computing","score":0.41614818572998047},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.37544748187065125},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20625880360603333},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.19079852104187012},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10357478260993958}],"concepts":[{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.8913663625717163},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7612453103065491},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6469572186470032},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6334806680679321},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.4887795150279999},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.43204209208488464},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.41614818572998047},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.37544748187065125},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20625880360603333},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.19079852104187012},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10357478260993958},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3654777.3676382","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3654777.3676382","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th Annual ACM Symposium on User Interface Software and Technology","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2765874585","https://openalex.org/W3098062821","https://openalex.org/W3160022552","https://openalex.org/W4220747226","https://openalex.org/W4312516512","https://openalex.org/W4366549767","https://openalex.org/W4366549916","https://openalex.org/W4387801102"],"related_works":["https://openalex.org/W2883256816","https://openalex.org/W2171408034","https://openalex.org/W3003320923","https://openalex.org/W2106140982","https://openalex.org/W2152313554","https://openalex.org/W2064303750","https://openalex.org/W4285042611","https://openalex.org/W1509300825","https://openalex.org/W3092582874","https://openalex.org/W2338718585"],"abstract_inverted_index":{"The":[0],"emergent":[1],"large":[2],"language/multimodal":[3],"models":[4],"facilitate":[5],"the":[6,65,184],"evolution":[7],"of":[8,140,203,224],"mobile":[9,13,53,103,109,175,195,207,226],"agents,":[10],"especially":[11],"in":[12,162,183,205],"UI":[14,54,71,116,134,168],"task":[15,55,60,66,99,112,220],"automation.":[16],"However,":[17],"existing":[18],"evaluation":[19,77,204],"approaches,":[20],"which":[21],"rely":[22],"on":[23],"human":[24,214],"validation":[25],"or":[26],"established":[27],"datasets":[28,186],"to":[29,105,127,157,191],"compare":[30],"agent-predicted":[31],"actions":[32],"with":[33,107,136,166],"predefined":[34],"action":[35],"sequences,":[36],"are":[37,231],"unscalable":[38],"and":[39,57,123,130,154,177,187,209,222,229],"unfaithful.":[40],"To":[41],"overcome":[42],"these":[43],"limitations,":[44],"this":[45],"paper":[46],"presents":[47],"LlamaTouch,":[48],"a":[49,75,137],"testbed":[50],"for":[51,111],"on-device":[52],"execution":[56,67,100],"faithful,":[58],"scalable":[59],"evaluation.":[61],"By":[62],"observing":[63],"that":[64,79,101,119,151],"process":[68],"only":[69,80],"transfers":[70],"states,":[72],"LlamaTouch":[73,92,171,216],"employs":[74],"novel":[76],"approach":[78],"assesses":[81],"whether":[82],"an":[83],"agent":[84],"traverses":[85],"all":[86],"manually":[87],"annotated,":[88],"essential":[89,133],"application/system":[90],"states.":[91],"comprises":[93],"three":[94],"key":[95],"techniques:":[96],"(1)":[97],"On-device":[98],"enables":[102,218],"agents":[104,176],"interact":[106],"realistic":[108],"environments":[110,208],"execution.":[113],"(2)":[114],"Fine-grained":[115],"component":[117],"annotation":[118,142,221],"merges":[120],"pixel-level":[121],"screenshots":[122],"textual":[124],"screen":[125],"hierarchies":[126],"explicitly":[128],"identify":[129],"precisely":[131],"annotate":[132],"components":[135],"rich":[138],"set":[139],"designed":[141],"primitives.":[143],"(3)":[144],"A":[145],"multi-level":[146],"application":[147],"state":[148],"matching":[149,156],"algorithm":[150],"utilizes":[152],"exact":[153],"fuzzy":[155],"accurately":[158],"detect":[159],"critical":[160],"information":[161],"each":[163],"screen,":[164],"even":[165],"unpredictable":[167],"layout/content":[169],"dynamics.":[170],"currently":[172],"incorporates":[173],"four":[174],"496":[178],"tasks,":[179],"encompassing":[180],"both":[181],"tasks":[182],"widely-used":[185],"our":[188],"self-constructed":[189],"ones":[190],"cover":[192],"more":[193],"diverse":[194],"applications.":[196],"Evaluation":[197],"results":[198],"demonstrate":[199],"LlamaTouch\u2019s":[200],"high":[201],"faithfulness":[202],"real-world":[206],"its":[210],"better":[211],"scalability":[212],"than":[213],"validation.":[215],"also":[217],"easy":[219],"integration":[223],"new":[225],"agents.":[227],"Code":[228],"dataset":[230],"publicly":[232],"available":[233],"at":[234],"https://github.com/LlamaTouch/LlamaTouch.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
