{"id":"https://openalex.org/W4412888064","doi":"https://doi.org/10.18653/v1/2025.findings-acl.815","title":"WebUIBench: A Comprehensive Benchmark for Evaluating Multimodal Large Language Models in WebUI-to-Code","display_name":"WebUIBench: A Comprehensive Benchmark for Evaluating Multimodal Large Language Models in WebUI-to-Code","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888064","doi":"https://doi.org/10.18653/v1/2025.findings-acl.815"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.815","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.815","pdf_url":"https://aclanthology.org/2025.findings-acl.815.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.815.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069028297","display_name":"Zhiyu Lin","orcid":"https://orcid.org/0000-0001-8045-9556"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiyu Lin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018734183","display_name":"Zhengda Zhou","orcid":"https://orcid.org/0009-0004-9348-5787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhengda Zhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079531245","display_name":"Zhiyuan Zhao","orcid":"https://orcid.org/0000-0002-1227-5058"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiyuan Zhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tianrui Wan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianrui Wan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100443236","display_name":"Yong Ma","orcid":"https://orcid.org/0000-0002-4139-9711"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yilun Ma","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014526931","display_name":"Junyu Gao","orcid":"https://orcid.org/0000-0002-8105-5497"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junyu Gao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100740147","display_name":"Xuelong Li","orcid":"https://orcid.org/0000-0003-2843-9173"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuelong Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.1514,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95277098,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"15780","last_page":"15797"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.954800009727478,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.954800009727478,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9473999738693237,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9210000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7560843229293823},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7258118391036987},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5603165626525879},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.43464595079421997},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4086410701274872},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3439907431602478}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7560843229293823},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7258118391036987},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5603165626525879},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.43464595079421997},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4086410701274872},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3439907431602478},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.815","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.815","pdf_url":"https://aclanthology.org/2025.findings-acl.815.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.815","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.815","pdf_url":"https://aclanthology.org/2025.findings-acl.815.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888064.pdf","grobid_xml":"https://content.openalex.org/works/W4412888064.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526","https://openalex.org/W2121910908","https://openalex.org/W915438175","https://openalex.org/W3204019825"],"abstract_inverted_index":{"With":[0],"the":[1,13,29,39,55,83,129,139],"rapid":[2],"advancement":[3],"of":[4,22,34,41,57,68,85,124],"Generative":[5],"AI":[6,18],"technology,":[7],"Multimodal":[8],"Large":[9],"Language":[10],"Models(MLLMs)":[11],"have":[12],"potential":[14],"to":[15,37,64,96],"act":[16],"as":[17],"software":[19,86],"engineers":[20],"capable":[21],"executing":[23],"complex":[24],"web":[25],"application":[26],"development.Considering":[27],"that":[28,135],"model":[30],"requires":[31],"a":[32,46,92],"confluence":[33],"multidimensional":[35],"sub-capabilities":[36],"address":[38],"challenges":[40],"various":[42,133],"development":[43,58,140],"phases,":[44],"constructing":[45],"multi-view":[47],"evaluation":[48,123],"framework":[49],"is":[50],"crucial":[51],"for":[52],"accurately":[53],"guiding":[54],"enhancement":[56],"efficiency.However,":[59],"existing":[60],"benchmarks":[61],"usually":[62],"fail":[63],"provide":[65],"an":[66],"assessment":[67],"subcapabilities":[69],"and":[70,88,109,132],"focus":[71],"solely":[72],"on":[73],"webpage":[74],"generation":[75],"outcomes.In":[76],"this":[77],"work,":[78],"we":[79],"draw":[80],"inspiration":[81],"from":[82,117],"principles":[84],"engineering":[87],"further":[89],"propose":[90],"WebUIBench,":[91],"benchmark":[93],"systematically":[94],"designed":[95],"evaluate":[97],"MLLMs":[98,127],"in":[99],"four":[100],"key":[101],"areas:":[102],"WebUI":[103],"Perception,":[104],"HTML":[105],"Programming,":[106],"WebUI-HTML":[107],"Understanding,":[108],"WebUI-to-Code.WebUIBench":[110],"comprises":[111],"21K":[112],"high-quality":[113],"question-answer":[114],"pairs":[115],"derived":[116],"over":[118],"0.7K":[119],"real-world":[120],"websites.The":[121],"extensive":[122],"29":[125],"mainstream":[126],"uncovers":[128],"skill":[130],"characteristics":[131],"weakness":[134],"models":[136],"encountered":[137],"during":[138],"process.":[141]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-16T09:24:06.705377","created_date":"2025-10-10T00:00:00"}
