{"id":"https://openalex.org/W4415254200","doi":"https://doi.org/10.1109/iccv51701.2025.00158","title":"UIPro: Unleashing Superior Interaction Capability for GUI Agents","display_name":"UIPro: Unleashing Superior Interaction Capability for GUI Agents","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4415254200","doi":"https://doi.org/10.1109/iccv51701.2025.00158"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.00158","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.17328","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100708468","display_name":"Hongxin Li","orcid":"https://orcid.org/0000-0003-4951-9770"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongxin Li","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS)","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072056125","display_name":"Jingran Su","orcid":"https://orcid.org/0000-0002-9873-1770"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingran Su","raw_affiliation_strings":["PolyU"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PolyU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101642745","display_name":"Jingfan Chen","orcid":"https://orcid.org/0000-0002-7559-6924"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingfan Chen","raw_affiliation_strings":["PolyU"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PolyU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075599550","display_name":"Zheng Ju","orcid":"https://orcid.org/0000-0002-5411-6952"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Ju","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS)","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102821179","display_name":"Yuntao Chen","orcid":"https://orcid.org/0000-0002-9555-1897"},"institutions":[{"id":"https://openalex.org/I4210147894","display_name":"Institut de Recherche et d\u2019Innovation","ror":"https://ror.org/04nqadf13","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210147894"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Yuntao Chen","raw_affiliation_strings":["Hong Kong Institute of Science &#x0026; Innovation,CASIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong Institute of Science &#x0026; Innovation,CASIA","institution_ids":["https://openalex.org/I4210147894"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043699117","display_name":"Qing Yun Li","orcid":"https://orcid.org/0000-0001-8128-6319"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qing Li","raw_affiliation_strings":["PolyU"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PolyU","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5120025653","display_name":"Zhaoxiang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxiang Zhang","raw_affiliation_strings":["University of Chinese Academy of Sciences (UCAS)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences (UCAS)","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100708468"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26172722,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1613","last_page":"1623"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.9577000141143799,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.9203000068664551,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6399999856948853},{"id":"https://openalex.org/keywords/graphical-user-interface","display_name":"Graphical user interface","score":0.635699987411499},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6060000061988831},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5257999897003174},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4171999990940094},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.3165999948978424},{"id":"https://openalex.org/keywords/graphical-model","display_name":"Graphical model","score":0.30160000920295715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7361999750137329},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6399999856948853},{"id":"https://openalex.org/C37789001","wikidata":"https://www.wikidata.org/wiki/Q782543","display_name":"Graphical user interface","level":2,"score":0.635699987411499},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6060000061988831},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5888000130653381},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5257999897003174},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4171999990940094},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.32120001316070557},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3046000003814697},{"id":"https://openalex.org/C155846161","wikidata":"https://www.wikidata.org/wiki/Q1143367","display_name":"Graphical model","level":2,"score":0.30160000920295715},{"id":"https://openalex.org/C66153210","wikidata":"https://www.wikidata.org/wiki/Q5597182","display_name":"Graphical user interface testing","level":4,"score":0.29789999127388},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.27950000762939453},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.00158","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2509.17328","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.17328","pdf_url":"https://arxiv.org/pdf/2509.17328","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.17328","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.17328","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.17328","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.17328","pdf_url":"https://arxiv.org/pdf/2509.17328","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1817111062","display_name":null,"funder_award_id":"2022ZD0160102","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3271378985","display_name":null,"funder_award_id":"U21B2042,62320106010","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Building":[0],"autonomous":[1],"agents":[2,27,47],"that":[3],"perceive":[4],"and":[5,38,64,93,148],"operate":[6],"graphical":[7],"user":[8],"interfaces":[9],"(GUIs)":[10],"like":[11],"humans":[12],"has":[13],"long":[14],"been":[15],"a":[16,84,100,107,121,137,150],"vision":[17],"in":[18],"the":[19,29,50,59,69,155,179],"field":[20],"of":[21,54,71,159,181],"artificial":[22],"intelligence.":[23],"Central":[24],"to":[25,116,129,141,153],"these":[26,78],"is":[28,127],"capability":[30],"for":[31],"GUI":[32,36,46,74,87,95,113,123,131,144,172],"interaction,":[33],"which":[34,126],"involves":[35],"understanding":[37,114],"planning":[39],"capabilities.":[40],"Existing":[41],"methods":[42],"have":[43],"tried":[44],"developing":[45],"based":[48],"on":[49,175],"multi-modal":[51],"comprehension":[52],"ability":[53,158],"vision-language":[55],"models":[56],"(VLMs).":[57],"However,":[58],"limited":[60],"scenario,":[61],"insufficient":[62],"size,":[63],"heterogeneous":[65,143],"action":[66,102,139,156],"spaces":[67],"hinder":[68],"progress":[70],"building":[72],"generalist":[73,86],"agents.":[75],"To":[76],"resolve":[77],"issues,":[79],"this":[80],"paper":[81],"proposes":[82],"\\textbf{UIPro},":[83],"novel":[85],"agent":[88,132,145],"trained":[89],"with":[90,99],"extensive":[91],"multi-platform":[92],"multi-task":[94],"interaction":[96],"data,":[97],"coupled":[98],"unified":[101,138],"space.":[103],"We":[104],"first":[105],"curate":[106],"comprehensive":[108],"dataset":[109,152],"encompassing":[110],"20.6":[111],"million":[112],"tasks":[115],"pre-train":[117],"UIPro,":[118],"granting":[119],"it":[120],"strong":[122],"grounding":[124],"capability,":[125],"key":[128],"downstream":[130],"tasks.":[133],"Subsequently,":[134],"we":[135],"establish":[136],"space":[140],"harmonize":[142],"task":[146,173],"datasets":[147],"produce":[149],"merged":[151],"foster":[154],"prediction":[157],"UIPro":[160],"via":[161],"continued":[162],"fine-tuning.":[163],"Experimental":[164],"results":[165],"demonstrate":[166],"UIPro's":[167],"superior":[168],"performance":[169],"across":[170],"multiple":[171],"benchmarks":[174],"various":[176],"platforms,":[177],"highlighting":[178],"effectiveness":[180],"our":[182],"approach.":[183]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-16T00:00:00"}
