{"id":"https://openalex.org/W7135231419","doi":"https://doi.org/10.48550/arxiv.2603.11486","title":"Quantized Inference for OneRec-V2","display_name":"Quantized Inference for OneRec-V2","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135231419","doi":"https://doi.org/10.48550/arxiv.2603.11486"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11486","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11486","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11486","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128938384","display_name":"Yi Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Su, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129083779","display_name":"Xinchen Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Xinchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129061468","display_name":"Hongtao Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Hongtao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129018356","display_name":"Ziteng Shu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shu, Ziteng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110963616","display_name":"Yunfeng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yunfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129069193","display_name":"Fangyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Fangyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128951371","display_name":"Jiaqiang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiaqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128995206","display_name":"Xiao Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129038067","display_name":"Yiwu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yiwu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129072851","display_name":"Ruiming Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Ruiming","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5128938384"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2361000031232834,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2361000031232834,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.15330000221729279,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13740000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.8773000240325928},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.645799994468689},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5645999908447266},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5015000104904175},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4293999969959259},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.42410001158714294},{"id":"https://openalex.org/keywords/approximate-inference","display_name":"Approximate inference","score":0.42329999804496765},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.3303999900817871}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.8773000240325928},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7595000267028809},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.645799994468689},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5645999908447266},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5361999869346619},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5015000104904175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5009999871253967},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4293999969959259},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.42329999804496765},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32120001316070557},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.29899999499320984},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.29809999465942383},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C196921405","wikidata":"https://www.wikidata.org/wiki/Q786431","display_name":"Online algorithm","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2574000060558319}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11486","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11486","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11486","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11486","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.648897647857666,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Quantized":[0],"inference":[1,93,136,168,180,196],"has":[2],"demonstrated":[3],"substantial":[4],"system-level":[5,220],"benefits":[6],"in":[7,26,34,48,94,178,186,200,224],"large":[8,123,215],"language":[9,124,216],"models":[10,54,125],"while":[11],"preserving":[12],"model":[13],"quality.":[14],"In":[15,70,87],"contrast,":[16],"reliably":[17],"applying":[18],"low-precision":[19,85,92,149],"quantization":[20,160],"to":[21,44,67,120,232],"recommender":[22,53,208],"systems":[23,209],"remains":[24],"challenging":[25],"industrial":[27],"settings.":[28],"This":[29],"difficulty":[30],"arises":[31],"from":[32,76],"differences":[33],"training":[35,159],"paradigms,":[36],"architectural":[37],"patterns,":[38],"and":[39,50,58,61,109,118,162,182,219],"computational":[40],"characteristics,":[41],"which":[42],"lead":[43],"distinct":[45],"numerical":[46],"behaviors":[47],"weights":[49,60],"activations.":[51],"Traditional":[52],"often":[55],"exhibit":[56],"high-magnitude":[57],"high-variance":[59],"activations,":[62],"making":[63],"them":[64],"more":[65,116,134,144],"sensitive":[66],"quantization-induced":[68],"perturbations.":[69],"addition,":[71],"recommendation":[72,128,234],"workloads":[73],"frequently":[74],"suffer":[75],"limited":[77],"hardware":[78,141],"utilization,":[79,142],"limiting":[80],"the":[81,95,107,212,225],"practical":[82],"gains":[83,147],"of":[84,97,112,122,214],"computation.":[86,150],"this":[88,152],"work,":[89],"we":[90,104,154],"revisit":[91],"context":[96],"generative":[98],"recommendation.":[99],"Through":[100],"empirical":[101],"distribution":[102],"analysis,":[103],"show":[105],"that":[106,194,206],"weight":[108],"activation":[110],"statistics":[111],"OneRec-V2":[113,131],"are":[114],"significantly":[115],"controlled":[117],"closer":[119],"those":[121],"than":[126],"traditional":[127],"models.":[129],"Moreover,":[130],"exhibits":[132],"a":[133,156,175,183],"compute-intensive":[135],"pattern":[137],"with":[138,148],"substantially":[139],"higher":[140],"enabling":[143],"end-to-end":[145,179],"throughput":[146],"Leveraging":[151],"property,":[153],"develop":[155],"FP8":[157,195],"post":[158],"framework":[161],"integrate":[163],"it":[164],"into":[165],"an":[166],"optimized":[167],"infrastructure.":[169],"The":[170],"proposed":[171],"joint":[172],"optimization":[173,221],"achieves":[174],"49\\%":[176],"reduction":[177],"latency":[181],"92\\%":[184],"increase":[185],"throughput.":[187],"Extensive":[188],"online":[189],"A/B":[190],"testing":[191],"further":[192],"confirms":[193],"introduces":[197],"no":[198],"degradation":[199],"core":[201],"metrics.":[202],"These":[203],"results":[204],"suggest":[205],"as":[207],"evolve":[210],"toward":[211],"paradigms":[213],"models,":[217],"algorithm-level":[218],"techniques":[222],"established":[223],"LLM":[226],"domain":[227],"can":[228],"be":[229],"effectively":[230],"adapted":[231],"large-scale":[233],"workloads.":[235]},"counts_by_year":[],"updated_date":"2026-03-14T06:46:50.379900","created_date":"2026-03-14T00:00:00"}
