{"id":"https://openalex.org/W7160923282","doi":"https://doi.org/10.48550/arxiv.2605.10347","title":"How Mobile World Model Guides GUI Agents?","display_name":"How Mobile World Model Guides GUI Agents?","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7160923282","doi":"https://doi.org/10.48550/arxiv.2605.10347"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.10347","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10347","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.10347","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077564485","display_name":"Weikai Xu","orcid":"https://orcid.org/0009-0003-6449-6703"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Weikai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135973092","display_name":"Kun Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Kun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135935296","display_name":"Yunren Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Yunren","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135913915","display_name":"Jiaxing Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiaxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135938206","display_name":"Yuhan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135977964","display_name":"Yuxuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135993536","display_name":"Zhizheng Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Zhizheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135932228","display_name":"Heng Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Heng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135941581","display_name":"Pengzhi Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Pengzhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135935806","display_name":"Wei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135924991","display_name":"Jian Luan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luan, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135916515","display_name":"Xiaolin Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Xiaolin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135912741","display_name":"Bo An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Bo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.29760000109672546,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.29760000109672546,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.2168000042438507,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.044199999421834946,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5389000177383423},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5182999968528748},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4745999872684479},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.40799999237060547},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.38280001282691956},{"id":"https://openalex.org/keywords/mobile-computing","display_name":"Mobile computing","score":0.3691999912261963},{"id":"https://openalex.org/keywords/real-world-data","display_name":"Real world data","score":0.3686999976634979},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.36820000410079956}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7551000118255615},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.6241999864578247},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5389000177383423},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5182999968528748},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4745999872684479},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.40799999237060547},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.38280001282691956},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.3691999912261963},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.3686999976634979},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.36820000410079956},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.33730000257492065},{"id":"https://openalex.org/C2988145974","wikidata":"https://www.wikidata.org/wiki/Q620615","display_name":"Mobile apps","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.32330000400543213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31040000915527344},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2849000096321106},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.260699987411499}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.10347","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10347","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.10347","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10347","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,154],"vision-language":[3],"models":[4,34,81,95,190],"have":[5],"enabled":[6],"mobile":[7,32,75,176],"GUI":[8],"agents":[9,62,177],"to":[10],"perceive":[11],"visual":[12],"interfaces":[13],"and":[14,28,57,73,91,102,113,127,158],"execute":[15],"user":[16],"instructions,":[17],"but":[18],"reliable":[19],"prediction":[20],"of":[21,63],"action":[22,180],"consequences":[23],"remains":[24,44],"critical":[25],"for":[26,132,141,174],"long-horizon":[27],"high-risk":[29],"interactions.":[30],"Existing":[31],"world":[33,80,189],"provide":[35,150],"either":[36],"text-based":[37,136],"or":[38,197],"image-based":[39],"future":[40],"states,":[41],"yet":[42],"it":[43],"unclear":[45],"which":[46],"representation":[47],"is":[48,138],"useful,":[49],"whether":[50],"generated":[51],"rollouts":[52],"can":[53,149],"replace":[54],"real":[55],"environments,":[56],"how":[58],"test-time":[59],"guidance":[60],"helps":[61],"different":[64],"strengths.":[65],"To":[66],"answer":[67],"the":[68,155,170],"above":[69],"questions,":[70],"we":[71,115],"filter":[72],"annotate":[74],"world-model":[76],"data,":[77],"then":[78],"train":[79],"across":[82],"four":[83],"modalities:":[84],"delta":[85],"text,":[86,88],"full":[87],"diffusion-based":[89],"images,":[90],"renderable":[92,120],"code.":[93],"These":[94],"achieve":[96],"SoTA":[97],"performance":[98],"on":[99,110],"both":[100],"MobileWorldBench":[101],"Code2WorldBench.":[103],"Furthermore,":[104],"by":[105],"evaluating":[106],"their":[107],"downstream":[108],"utility":[109],"AITZ,":[111],"AndroidControl,":[112],"AndroidWorld,":[114],"obtain":[116],"three":[117],"findings.":[118],"First,":[119],"code":[121],"reconstruction":[122],"achieves":[123],"high":[124],"in-distribution":[125],"fidelity":[126],"provides":[128,184],"effective":[129,193],"multimodal":[130],"supervision":[131,199],"data":[133,166],"construction,":[134],"while":[135],"feedback":[137],"more":[139,192],"robust":[140],"online":[142],"out-of-distribution":[143],"(OOD)":[144],"execution.":[145],"Second,":[146],"world-model-generated":[147],"trajectories":[148],"transferable":[151],"interaction":[152],"experience":[153],"training":[156,198],"process":[157],"improve":[159],"agents'":[160],"end-to-end":[161],"task":[162],"performance,":[163],"although":[164],"these":[165],"do":[167],"not":[168],"preserve":[169],"original":[171],"distribution.":[172],"Last,":[173],"overconfident":[175],"with":[178],"low":[179],"entropy,":[181],"posterior":[182],"self-reflection":[183],"limited":[185],"gains,":[186],"suggesting":[187],"that":[188],"are":[191],"as":[194,201],"prior":[195],"perception":[196],"than":[200],"universal":[202],"post-hoc":[203],"verifiers.":[204]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-13T00:00:00"}
