{"id":"https://openalex.org/W7152706713","doi":"https://doi.org/10.1145/3774904.3792797","title":"NEZHA: A Zero-sacrifice and Hyperspeed Decoding Architecture for Generative Recommendations","display_name":"NEZHA: A Zero-sacrifice and Hyperspeed Decoding Architecture for Generative Recommendations","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7152706713","doi":"https://doi.org/10.1145/3774904.3792797"},"language":null,"primary_location":{"id":"doi:10.1145/3774904.3792797","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3774904.3792797","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3774904.3792797","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052254280","display_name":"Yejing Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yejing Wang","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-2852-9910","affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shengyu Zhou","orcid":"https://orcid.org/0000-0001-8716-7716"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengyu Zhou","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8716-7716","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133268964","display_name":"Jinyu Lu","orcid":"https://orcid.org/0009-0004-5968-2656"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinyu Lu","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-5968-2656","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ziwei Liu","orcid":"https://orcid.org/0000-0002-7172-2874"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ziwei Liu","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-7172-2874","affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075851850","display_name":"Langming Liu","orcid":"https://orcid.org/0000-0003-1995-3381"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Langming Liu","raw_affiliation_strings":["Alibaba Group, Hang Zhou, China"],"raw_orcid":"https://orcid.org/0000-0003-1995-3381","affiliations":[{"raw_affiliation_string":"Alibaba Group, Hang Zhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021037797","display_name":"Maolin Wang","orcid":"https://orcid.org/0000-0002-0073-0172"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Maolin Wang","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-0073-0172","affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129699738","display_name":"Wenlin Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wenlin Zhang","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-1809-8264","affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133305019","display_name":"Feng Li","orcid":"https://orcid.org/0009-0001-0770-2107"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Li","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-0770-2107","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113392255","display_name":"Wenbo Su","orcid":"https://orcid.org/0009-0009-3800-7543"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbo Su","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-3800-7543","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037002673","display_name":"Pengjie Wang","orcid":"https://orcid.org/0009-0006-4285-5033"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengjie Wang","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-4285-5033","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100739665","display_name":"Jian Xu","orcid":"https://orcid.org/0000-0003-3111-1005"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Xu","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3111-1005","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiangyu Zhao","orcid":"https://orcid.org/0000-0003-2926-4416"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiangyu Zhao","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-2926-4416","affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5052254280"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.8348536,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8073","last_page":"8082"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.7057999968528748,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.7057999968528748,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03099999949336052,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.024800000712275505,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5098999738693237},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4650999903678894},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3483000099658966},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.3292999863624573},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.30889999866485596},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.29019999504089355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6682000160217285},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5098999738693237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4706999957561493},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4650999903678894},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32519999146461487},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2833000123500824},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2831999957561493},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3774904.3792797","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3774904.3792797","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3774904.3792797","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3774904.3792797","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.4863872528076172}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2587741066","https://openalex.org/W4312974539","https://openalex.org/W4376312036","https://openalex.org/W4396758712","https://openalex.org/W4400909953","https://openalex.org/W4401042997","https://openalex.org/W4403220611","https://openalex.org/W4403221739","https://openalex.org/W4403395420","https://openalex.org/W4403577940","https://openalex.org/W4403582496","https://openalex.org/W4407375814","https://openalex.org/W4409158513","https://openalex.org/W4412394824","https://openalex.org/W4412394897","https://openalex.org/W4412825745","https://openalex.org/W4412876963","https://openalex.org/W4412886974","https://openalex.org/W4414034826","https://openalex.org/W4416017185","https://openalex.org/W4416018017"],"related_works":[],"abstract_inverted_index":{"Generative":[0],"Recommendation":[1],"(GR),":[2],"powered":[3],"by":[4,25],"Large":[5],"Language":[6],"Models":[7],"(LLMs),":[8],"represents":[9],"a":[10,86,103,120,151],"promising":[11],"new":[12,58],"paradigm":[13],"for":[14,32,93],"industrial":[15],"recommender":[16],"systems.":[17],"However,":[18],"their":[19,38],"practical":[20],"application":[21],"is":[22,197],"severely":[23],"hindered":[24],"high":[26],"inference":[27],"latency,":[28],"making":[29],"them":[30],"infeasible":[31],"high-throughput,":[33],"real-time":[34],"services":[35],"and":[36,66,73,166,186],"limiting":[37],"overall":[39],"business":[40,179],"impact.":[41],"While":[42],"Speculative":[43],"Decoding":[44],"(SD)":[45],"has":[46],"been":[47],"proposed":[48],"to":[49,132,182],"accelerate":[50],"the":[51,110,126,134,156,170],"autoregressive":[52,105],"generation":[53],"process,":[54],"existing":[55],"implementations":[56],"introduce":[57,144],"bottlenecks:":[59],"they":[60],"typically":[61],"require":[62,70],"separate":[63],"draft":[64,106],"models":[65],"model-based":[67],"verifiers,":[68],"which":[69],"additional":[71],"training":[72],"increase":[74],"latency":[75],"overhead.":[76],"In":[77],"this":[78],"paper,":[79],"we":[80],"address":[81],"these":[82],"challenges":[83],"with":[84,119],"NEZHA,":[85],"novel":[87],"architecture":[88],"that":[89],"achieves":[90],"hyperspeed":[91],"decoding":[92],"GR":[94],"systems":[95],"without":[96],"sacrificing":[97],"recommendation":[98],"quality.":[99],"Specifically,":[100],"NEZHA":[101,159],"integrates":[102],"nimble":[104],"head":[107],"directly":[108],"into":[109],"primary":[111],"model,":[112],"enabling":[113],"efficient":[114],"self-drafting.":[115],"This":[116],"design,":[117],"combined":[118],"specialized":[121],"input":[122],"prompt":[123],"structure,":[124],"preserves":[125],"integrity":[127],"of":[128,137,141,158,189,191],"sequence-to-sequence":[129],"generation.":[130],"Furthermore,":[131],"tackle":[133],"critical":[135],"problem":[136],"hallucination\u2014a":[138],"major":[139],"source":[140],"performance":[142],"degradation\u2014we":[143],"an":[145],"efficient,":[146],"model-free":[147],"verifier":[148],"based":[149],"on":[150,163,172],"hash":[152],"set.":[153],"We":[154],"demonstrate":[155],"effectiveness":[157],"through":[160],"extensive":[161],"experiments":[162],"public":[164],"datasets":[165],"have":[167],"successfully":[168],"deployed":[169],"system":[171],"Taobao":[173],"since":[174],"October":[175],"2025,":[176],"achieving":[177],"1.2%":[178],"improvement,":[180],"translating":[181],"billion-level":[183],"advertising":[184],"revenue":[185],"serving":[187],"hundreds":[188],"millions":[190],"daily":[192],"active":[193],"users.":[194],"The":[195],"code":[196],"available":[198],"at":[199],"https://github.com/Applied-Machine-Learning-":[200],"Lab/WWW2026_NEZHA.":[201]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-10T00:00:00"}
