{"id":"https://openalex.org/W7154611166","doi":"https://doi.org/10.48550/arxiv.2604.14113","title":"UI-Zoomer: Uncertainty-Driven Adaptive Zoom-In for GUI Grounding","display_name":"UI-Zoomer: Uncertainty-Driven Adaptive Zoom-In for GUI Grounding","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154611166","doi":"https://doi.org/10.48550/arxiv.2604.14113"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14113","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133804073","display_name":"Fei Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Fei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133744874","display_name":"Bofan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Bofan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133789676","display_name":"Zhengxi Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Zhengxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133792699","display_name":"Tongbo Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Tongbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133776250","display_name":"Songqin Nong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nong, Songqin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133793598","display_name":"Tao Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133800818","display_name":"Wenhao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Wenhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133783067","display_name":"Weiming Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Weiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133822010","display_name":"Jun Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133773868","display_name":"Yueting Zhuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Yueting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133745346","display_name":"Yongliang Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Yongliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.671500027179718,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.671500027179718,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.056699998676776886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10789","display_name":"Interactive and Immersive Displays","score":0.053300000727176666,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5640000104904175},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5367000102996826},{"id":"https://openalex.org/keywords/cropping","display_name":"Cropping","score":0.49639999866485596},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4645000100135803},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.46070000529289246},{"id":"https://openalex.org/keywords/sizing","display_name":"Sizing","score":0.34450000524520874},{"id":"https://openalex.org/keywords/graphical-user-interface","display_name":"Graphical user interface","score":0.29580000042915344}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6021000146865845},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5640000104904175},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5367000102996826},{"id":"https://openalex.org/C13558536","wikidata":"https://www.wikidata.org/wiki/Q785116","display_name":"Cropping","level":3,"score":0.49639999866485596},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4645000100135803},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.46070000529289246},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4230000078678131},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39910000562667847},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35109999775886536},{"id":"https://openalex.org/C2777767291","wikidata":"https://www.wikidata.org/wiki/Q1080291","display_name":"Sizing","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C37789001","wikidata":"https://www.wikidata.org/wiki/Q782543","display_name":"Graphical user interface","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28940001130104065},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C127491075","wikidata":"https://www.wikidata.org/wiki/Q7617825","display_name":"Stochastic modelling","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5863063335418701,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"GUI":[0],"grounding,":[1],"which":[2],"localizes":[3],"interface":[4],"elements":[5],"from":[6],"screenshots":[7],"given":[8],"natural":[9],"language":[10],"queries,":[11],"remains":[12],"challenging":[13],"for":[14],"small":[15],"icons":[16],"and":[17,27,67,113,133,152],"dense":[18],"layouts.":[19],"Test-time":[20],"zoom-in":[21,60,70,93],"methods":[22],"improve":[23],"localization":[24,96],"by":[25],"cropping":[26,35],"re-running":[28],"inference":[29],"at":[30],"higher":[31],"resolution,":[32],"but":[33],"apply":[34],"uniformly":[36],"across":[37,141],"all":[38],"instances":[39],"with":[40,86,155],"fixed":[41],"crop":[42,103,120],"sizes,":[43],"ignoring":[44],"whether":[45],"the":[46,65,123],"model":[47,143],"is":[48,97],"actually":[49],"uncertain":[50],"on":[51,130],"each":[52],"case.":[53],"We":[54],"propose":[55],"\\textbf{UI-Zoomer},":[56],"a":[57,72,118],"training-free":[58],"adaptive":[59],"framework":[61],"that":[62],"treats":[63],"both":[64],"trigger":[66,92],"scale":[68],"of":[69,125,147],"as":[71],"prediction":[73,107],"uncertainty":[74],"quantification":[75],"problem.":[76],"A":[77],"confidence-aware":[78],"gate":[79],"fuses":[80],"spatial":[81],"consensus":[82],"among":[83],"stochastic":[84],"candidates":[85],"token-level":[87],"generation":[88],"confidence":[89],"to":[90,149],"selectively":[91],"only":[94],"when":[95],"uncertain.":[98],"When":[99],"triggered,":[100],"an":[101],"uncertainty-driven":[102],"sizing":[104],"module":[105],"decomposes":[106],"variance":[108],"into":[109],"inter-sample":[110],"positional":[111],"spread":[112],"intra-sample":[114],"box":[115],"extent,":[116],"deriving":[117],"per-instance":[119],"radius":[121],"via":[122],"law":[124],"total":[126],"variance.":[127],"Extensive":[128],"experiments":[129],"ScreenSpot-Pro,":[131],"UI-Vision,":[132],"ScreenSpot-v2":[134],"demonstrate":[135],"consistent":[136],"improvements":[137],"over":[138],"strong":[139],"baselines":[140],"multiple":[142],"architectures,":[144],"achieving":[145],"gains":[146],"up":[148],"+13.4\\%,":[150],"+10.3\\%,":[151],"+4.2\\%":[153],"respectively,":[154],"no":[156],"additional":[157],"training":[158],"required.":[159]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-17T00:00:00"}
