{"id":"https://openalex.org/W4409671048","doi":"https://doi.org/10.1145/3696410.3714553","title":"WeInfer: Unleashing the Power of WebGPU on LLM Inference in Web Browsers","display_name":"WeInfer: Unleashing the Power of WebGPU on LLM Inference in Web Browsers","publication_year":2025,"publication_date":"2025-04-22","ids":{"openalex":"https://openalex.org/W4409671048","doi":"https://doi.org/10.1145/3696410.3714553"},"language":"en","primary_location":{"id":"doi:10.1145/3696410.3714553","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714553","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714553","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714553","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhiyang Chen","orcid":"https://orcid.org/0009-0006-8607-8539"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyang Chen","raw_affiliation_strings":["Institute for Artificial Intelligence, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-8607-8539","affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101614814","display_name":"Yun Ma","orcid":"https://orcid.org/0000-0001-7866-4075"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Ma","raw_affiliation_strings":["Institute for Artificial Intelligence, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7866-4075","affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100568210","display_name":"Haiyang Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiyang Shen","raw_affiliation_strings":["Institute for Artificial Intelligence, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-4599-3198","affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5104285228","display_name":"Mugeng Liu","orcid":"https://orcid.org/0009-0002-7625-8721"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mugeng Liu","raw_affiliation_strings":["School of Computer Science, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-7625-8721","affiliations":[{"raw_affiliation_string":"School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.2763,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94940681,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4264","last_page":"4273"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9624000191688538,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.6407151222229004},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5844947099685669},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5740727782249451},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.5516563057899475},{"id":"https://openalex.org/keywords/web-browser","display_name":"Web browser","score":0.5154550671577454},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.34720438718795776},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11764198541641235}],"concepts":[{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6407151222229004},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5844947099685669},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5740727782249451},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.5516563057899475},{"id":"https://openalex.org/C2983909278","wikidata":"https://www.wikidata.org/wiki/Q6368","display_name":"Web browser","level":3,"score":0.5154550671577454},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.34720438718795776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11764198541641235},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3696410.3714553","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714553","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714553","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3696410.3714553","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714553","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714553","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320324787","display_name":"Peking University","ror":"https://ror.org/02v51f717"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409671048.pdf","grobid_xml":"https://content.openalex.org/works/W4409671048.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W2159079348","https://openalex.org/W2337423450","https://openalex.org/W2560674852","https://openalex.org/W2766260608","https://openalex.org/W2913293643","https://openalex.org/W2943851418","https://openalex.org/W2996874060","https://openalex.org/W3012249773","https://openalex.org/W3037353980","https://openalex.org/W3104849992","https://openalex.org/W3135795301","https://openalex.org/W3163966209","https://openalex.org/W4281651027","https://openalex.org/W4283021210","https://openalex.org/W4321636575","https://openalex.org/W4385469325","https://openalex.org/W4386385424","https://openalex.org/W4387500346","https://openalex.org/W4388874804","https://openalex.org/W4388994228","https://openalex.org/W4392427708","https://openalex.org/W4396723489","https://openalex.org/W4399323895","https://openalex.org/W4400016946","https://openalex.org/W4402667006","https://openalex.org/W6603024836"],"related_works":["https://openalex.org/W4254303646","https://openalex.org/W1540003828","https://openalex.org/W2287572233","https://openalex.org/W2443963150","https://openalex.org/W2489547187","https://openalex.org/W1533121303","https://openalex.org/W3014218496","https://openalex.org/W569963976","https://openalex.org/W569541443","https://openalex.org/W4242097731"],"abstract_inverted_index":{"Web-based":[0,64,104,208],"large":[1],"language":[2],"model":[3,180],"(LLM)":[4],"has":[5,47],"garnered":[6],"significant":[7],"attention":[8],"from":[9,81,149],"both":[10],"academia":[11],"and":[12,25,92,140,155,171,182],"industry":[13],"as":[14],"it":[15],"combines":[16],"the":[17,23,74,83,112,127,134,206],"benefits":[18],"of":[19,27,32,86,114,137,179],"on-device":[20],"computation":[21,154],"with":[22,130,204],"accessibility":[24],"portability":[26],"Web":[28,40],"applications.":[29],"The":[30,185],"advent":[31],"WebGPU,":[33,87],"a":[34,44,176,199],"modern":[35],"browser":[36],"API":[37],"that":[38,62,125,145,188],"enables":[39],"applications":[41],"to":[42,110,159,198],"utilize":[43],"device's":[45],"GPU,":[46],"opened":[48],"up":[49,197],"new":[50],"possibilities":[51],"for":[52],"GPU-accelerated":[53],"LLM":[54,65,105,209],"inference":[55,66,75,106,210],"within":[56],"browsers.":[57],"However,":[58],"our":[59],"experiment":[60],"reveals":[61],"existing":[63],"frameworks":[67],"exhibit":[68],"inefficiencies":[69,78],"in":[70,89,193],"GPU":[71,150],"utilization,":[72],"limiting":[73],"speed.":[76],"These":[77],"primarily":[79],"arise":[80],"underutilizing":[82],"full":[84],"capabilities":[85],"particularly":[88],"resource":[90,131,147],"management":[91,136],"execution":[93],"synchronization.":[94],"To":[95],"address":[96],"these":[97],"limitations,":[98],"we":[99],"present":[100],"WeInfer,":[101],"an":[102,142],"efficient":[103],"framework":[107],"specifically":[108],"designed":[109],"unleash":[111],"power":[113],"WebGPU.":[115],"WeInfer":[116,189],"incorporates":[117],"two":[118],"key":[119],"innovations:":[120],"1)":[121],"buffer":[122],"reuse":[123],"strategies":[124],"reduce":[126],"overhead":[128],"associated":[129],"preparation,":[132],"optimizing":[133],"lifecycle":[135],"WebGPU":[138],"buffers,":[139],"2)":[141],"asynchronous":[143],"pipeline":[144],"decouples":[146],"preparation":[148],"execution,":[151],"enabling":[152],"parallelized":[153],"deferred":[156],"result":[157],"fetching":[158],"improve":[160],"overall":[161],"efficiency.":[162],"We":[163],"conduct":[164],"extensive":[165],"evaluations":[166],"across":[167],"9":[168],"different":[169],"LLMs":[170],"5":[172],"heterogeneous":[173],"devices,":[174],"covering":[175],"broad":[177],"spectrum":[178],"architectures":[181],"hardware":[183],"configurations.":[184],"results":[186],"demonstrate":[187],"delivers":[190],"substantial":[191],"improvements":[192],"decoding":[194],"speed,":[195],"achieving":[196],"3.76\u00d7":[200],"performance":[201],"boost":[202],"compared":[203],"WebLLM,":[205],"state-of-the-art":[207],"framework.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
