{"id":"https://openalex.org/W7154105858","doi":"https://doi.org/10.48550/arxiv.2604.09442","title":"UIPress: Bringing Optical Token Compression to UI-to-Code Generation","display_name":"UIPress: Bringing Optical Token Compression to UI-to-Code Generation","publication_year":2026,"publication_date":"2026-04-10","ids":{"openalex":"https://openalex.org/W7154105858","doi":"https://doi.org/10.48550/arxiv.2604.09442"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.09442","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.09442","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.09442","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133548210","display_name":"Dasen Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dai, Dasen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133524427","display_name":"Shuoqi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Shuoqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133509471","display_name":"Ronghao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ronghao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133525848","display_name":"Huacan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Huacan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133507683","display_name":"Biao Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Biao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133528352","display_name":"Qizhen Lan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan, Qizhen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5133548210"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.1451999992132187,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.1451999992132187,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.12610000371932983,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.08389999717473984,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7465999722480774},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5985000133514404},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5924999713897705},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.49709999561309814},{"id":"https://openalex.org/keywords/uncompressed-video","display_name":"Uncompressed video","score":0.49309998750686646},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4408999979496002},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.399399995803833},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.39079999923706055}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8270999789237976},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7465999722480774},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5985000133514404},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5924999713897705},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.49709999561309814},{"id":"https://openalex.org/C162478608","wikidata":"https://www.wikidata.org/wiki/Q4011369","display_name":"Uncompressed video","level":4,"score":0.49309998750686646},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4408999979496002},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.39079999923706055},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3889999985694885},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.3880000114440918},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.38609999418258667},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.38019999861717224},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3675999939441681},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3483000099658966},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.34599998593330383},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2939999997615814},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.27810001373291016},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.26829999685287476},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.2678000032901764},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2671999931335449},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2639999985694885},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2590000033378601},{"id":"https://openalex.org/C80156102","wikidata":"https://www.wikidata.org/wiki/Q788036","display_name":"Resolver","level":3,"score":0.25780001282691956},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.09442","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.09442","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.09442","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.09442","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"UI-to-Code":[0,82,210],"generation":[1],"requires":[2],"vision-language":[3],"models":[4],"(VLMs)":[5],"to":[6,53,81,114,119,133],"produce":[7],"thousands":[8],"of":[9,11,58,102,123,147,174,197],"tokens":[10,28,118,169],"structured":[12],"HTML/CSS":[13],"from":[14],"a":[15,87,120,153,171],"single":[16],"screenshot,":[17],"making":[18],"visual":[19,117],"token":[20],"efficiency":[21],"critical.":[22],"Existing":[23],"compression":[24,65,90,206],"methods":[25],"either":[26],"select":[27],"at":[29,167],"inference":[30],"time":[31],"using":[32],"task-agnostic":[33],"heuristics,":[34],"or":[35,51],"zero":[36],"out":[37],"low-attention":[38],"features":[39],"without":[40],"actually":[41],"shortening":[42],"the":[43,54,94,99,131,135,138,148,157,177,183,195,202,209],"sequence":[44],"--":[45],"neither":[46],"truly":[47],"reduces":[48],"prefill":[49],"latency":[50],"adapts":[52],"non-uniform":[55],"information":[56],"density":[57],"UI":[59],"screenshots.":[60],"Meanwhile,":[61],"optical":[62],"(encoder-side":[63],"learned)":[64],"has":[66,77],"shown":[67],"strong":[68],"results":[69],"for":[70,208],"document":[71],"OCR,":[72],"yet":[73],"no":[74],"prior":[75],"work":[76],"adapted":[78],"this":[79],"paradigm":[80],"generation.":[83],"We":[84],"propose":[85],"UIPress,":[86],"lightweight":[88],"learned":[89,205],"module":[91],"inserted":[92],"between":[93],"frozen":[95],"ViT":[96],"encoder":[97],"and":[98,111,182],"LLM":[100],"decoder":[101,132],"Qwen3-VL-8B.":[103],"UIPress":[104,166,200],"combines":[105],"depthwise-separable":[106],"convolutions,":[107],"element-guided":[108],"spatial":[109],"reweighting,":[110],"Transformer":[112],"refinement":[113],"compress":[115],"${\\sim}$6{,}700":[116],"fixed":[121],"budget":[122],"256.":[124],"Together":[125],"with":[126],"Low-Rank":[127],"Adaptation":[128],"(LoRA)":[129],"on":[130,156,164],"bridge":[134],"representation":[136],"gap,":[137],"entire":[139],"system":[140],"adds":[141],"only":[142],"${\\sim}$21.7M":[143],"trainable":[144],"parameters":[145],"(0.26\\%":[146],"8B":[149],"base":[150,159],"model).":[151],"Under":[152],"fair":[154],"comparison":[155],"same":[158],"model":[160],"against":[161],"four":[162],"baselines":[163],"Design2Code,":[165],"256":[168],"achieves":[170],"CLIP":[172],"score":[173],"0.8127,":[175],"outperforming":[176],"uncompressed":[178],"baseline":[179],"by":[180,187],"+7.5\\%":[181],"strongest":[184],"inference-time":[185],"method":[186,207],"+4.6\\%,":[188],"while":[189],"delivering":[190],"9.1$\\times$":[191],"time-to-first-token":[192],"speedup.":[193],"To":[194],"best":[196],"our":[198],"knowledge,":[199],"is":[201],"first":[203],"encoder-side":[204],"task.":[211]},"counts_by_year":[],"updated_date":"2026-04-14T06:08:25.285971","created_date":"2026-04-14T00:00:00"}
