{"id":"https://openalex.org/W7125824233","doi":"https://doi.org/10.48550/arxiv.2601.17187","title":"High-Rate Quantized Matrix Multiplication: Theory and Practice","display_name":"High-Rate Quantized Matrix Multiplication: Theory and Practice","publication_year":2026,"publication_date":"2026-01-23","ids":{"openalex":"https://openalex.org/W7125824233","doi":"https://doi.org/10.48550/arxiv.2601.17187"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.17187","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.17187","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.17187","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124001823","display_name":"Or Ordentlich","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ordentlich, Or","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124023891","display_name":"Yury Polyanskiy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Polyanskiy, Yury","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5124001823"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.6186000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.6186000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.15839999914169312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.04659999907016754,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.680400013923645},{"id":"https://openalex.org/keywords/multiplicative-function","display_name":"Multiplicative function","score":0.5291000008583069},{"id":"https://openalex.org/keywords/random-matrix","display_name":"Random matrix","score":0.4691999852657318},{"id":"https://openalex.org/keywords/covariance-matrix","display_name":"Covariance matrix","score":0.439300000667572},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.41920000314712524},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.40400001406669617},{"id":"https://openalex.org/keywords/rate\u2013distortion-theory","display_name":"Rate\u2013distortion theory","score":0.3756999969482422},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.3418999910354614}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.680400013923645},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5916000008583069},{"id":"https://openalex.org/C42747912","wikidata":"https://www.wikidata.org/wiki/Q1048447","display_name":"Multiplicative function","level":2,"score":0.5291000008583069},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4796999990940094},{"id":"https://openalex.org/C64812099","wikidata":"https://www.wikidata.org/wiki/Q176604","display_name":"Random matrix","level":3,"score":0.4691999852657318},{"id":"https://openalex.org/C185142706","wikidata":"https://www.wikidata.org/wiki/Q1134404","display_name":"Covariance matrix","level":2,"score":0.439300000667572},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.41920000314712524},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.40400001406669617},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.391400009393692},{"id":"https://openalex.org/C64185310","wikidata":"https://www.wikidata.org/wiki/Q843483","display_name":"Rate\u2013distortion theory","level":3,"score":0.3756999969482422},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.36399999260902405},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.35850000381469727},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C122123141","wikidata":"https://www.wikidata.org/wiki/Q176623","display_name":"Random variable","level":2,"score":0.31380000710487366},{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.28049999475479126},{"id":"https://openalex.org/C2982755384","wikidata":"https://www.wikidata.org/wiki/Q843483","display_name":"Rate distortion","level":3,"score":0.2720000147819519},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2653000056743622},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.2621999979019165},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2612000107765198}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.17187","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.17187","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.17187","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.17187","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,166],"work":[1],"investigates":[2],"the":[3,15,43,62,76,120,144,187,214,224,248],"problem":[4,121],"of":[5,18,53,78,99,122,143,189,208,213,226,251],"quantized":[6,35],"matrix":[7,45,51],"multiplication":[8],"(MatMul),":[9],"which":[10,107,160],"has":[11],"become":[12],"crucial":[13],"for":[14,106,229],"efficient":[16],"deployment":[17],"large":[19],"language":[20],"models":[21],"(LLMs).":[22],"We":[23,146],"consider":[24],"two":[25],"settings:":[26],"1)":[27],"Generic":[28],"MatMul,":[29],"where":[30,42],"both":[31],"matrices":[32],"must":[33],"be":[34,151,242],"(weight+activation":[36],"quantization);":[37],"and":[38,69,73,102,202,233],"2)":[39],"weight-only":[40],"quantization,":[41,105],"second":[44],"is":[46,117,180,196,203,221,259],"only":[47,171,185],"known":[48],"through":[49],"covariance":[50],"$\u03a3_X$":[52,190,235],"its":[54,177],"columns.":[55],"For":[56],"each":[57],"setting,":[58],"we":[59,90,108,238],"first":[60],"review":[61],"fundamental":[63,87],"information-theoretic":[64,215],"tradeoff":[65],"between":[66,141],"quantization":[67,81,116,157],"rate":[68,92,140,164],"distortion":[70,216],"(high-rate":[71],"theory),":[72],"then":[74],"analyze":[75],"performance":[77,179,220],"several":[79],"popular":[80],"schemes,":[82,195],"comparing":[83],"them":[84],"to":[85,95,119,153,198,241],"these":[86],"limits.":[88],"Specifically,":[89],"discuss":[91],"loss":[93],"(compared":[94],"information":[96],"theoretic":[97],"optima)":[98],"absmax":[100],"INT":[101,174],"floating-point":[103],"(FP)":[104],"also":[109,260],"derive":[110],"remarkably":[111],"accurate":[112],"heuristic":[113],"approximations.":[114],"Weight-only":[115],"related":[118],"weighted":[123],"mean":[124],"squared":[125],"error":[126],"(WMSE)":[127],"source":[128],"coding,":[129],"whose":[130],"classical":[131],"(reverse)":[132],"waterfilling":[133,149],"solution":[134],"dictates":[135],"how":[136,148],"one":[137],"should":[138],"distribute":[139],"coordinates":[142],"vector.":[145],"show":[147],"can":[150],"used":[152],"improve":[154],"practical":[155],"LLM":[156],"algorithms":[158],"(GPTQ),":[159],"at":[161],"present":[162],"allocate":[163],"equally.":[165],"new":[167],"scheme":[168],"(termed":[169],"``WaterSIC'')":[170],"uses":[172],"scalar":[173],"quantizers,":[175],"but":[176,228],"high-rate":[178,264],"basis":[181],"free":[182],"(it":[183],"depends":[184],"on":[186,247],"determinant":[188],"and,":[191],"thus,":[192],"unlike":[193],"existing":[194],"immune":[197],"applying":[199],"random":[200,231,257],"rotations)":[201],"within":[204,243],"a":[205,230],"multiplicative":[206],"factor":[207],"$\\frac{2\u03c0e}{12}$":[209],"(or":[210],"0.25":[211],"bit/entry)":[212],"limit":[217],"(!).":[218],"GPTQ's":[219],"affected":[222],"by":[223],"choice":[225],"basis,":[227],"rotation":[232,258],"actual":[234],"from":[236],"Llama-3-8B":[237],"find":[239],"GPTQ":[240,255],"0.1":[244],"bit":[245],"(depending":[246],"layer":[249],"type)":[250],"WaterSIC,":[252],"suggesting":[253],"that":[254],"with":[256],"near":[261],"optimal":[262],"(for":[263],"quantization).":[265]},"counts_by_year":[],"updated_date":"2026-01-28T23:18:48.515280","created_date":"2026-01-28T00:00:00"}
