{"id":"https://openalex.org/W7134932607","doi":"https://doi.org/10.48550/arxiv.2603.08713","title":"Unveiling the Potential of Quantization with MXFP4: Strategies for Quantization Error Reduction","display_name":"Unveiling the Potential of Quantization with MXFP4: Strategies for Quantization Error Reduction","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7134932607","doi":"https://doi.org/10.48550/arxiv.2603.08713"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.08713","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08713","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.08713","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059686266","display_name":"Jatin Chhugani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chhugani, Jatin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069792279","display_name":"Geonhwa Jeong","orcid":"https://orcid.org/0000-0001-6659-3927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jeong, Geonhwa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Su, Bor-Yiing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Bor-Yiing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128776885","display_name":"Yunjie Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Yunjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128793371","display_name":"Hanmei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Hanmei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089206756","display_name":"Aayush Ankit","orcid":"https://orcid.org/0000-0003-2827-8306"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ankit, Aayush","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008163714","display_name":"Jiecao Yu","orcid":"https://orcid.org/0000-0003-2085-0312"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Jiecao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024307057","display_name":"Summer Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Summer","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015551771","display_name":"Yunqing Chen","orcid":"https://orcid.org/0000-0002-3504-8472"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yunqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111985072","display_name":"Nadathur Satish","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Satish, Nadathur","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101667271","display_name":"Changkyu Kim","orcid":"https://orcid.org/0000-0002-0283-8371"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Changkyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.37290000915527344,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.37290000915527344,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.15410000085830688,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.061900001019239426,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8208000063896179},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.699999988079071},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.5932999849319458},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5925999879837036},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.49470001459121704},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.484499990940094},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4650000035762787},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4602000117301941}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8208000063896179},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.699999988079071},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6758999824523926},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.5932999849319458},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5929999947547913},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5925999879837036},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4650000035762787},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4602000117301941},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.4440999925136566},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.436599999666214},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.4108000099658966},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3677999973297119},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C2780056265","wikidata":"https://www.wikidata.org/wiki/Q106239881","display_name":"High dynamic range","level":3,"score":0.27300000190734863},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C42747912","wikidata":"https://www.wikidata.org/wiki/Q1048447","display_name":"Multiplicative function","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.08713","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08713","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.08713","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08713","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"intensified":[5],"the":[6,104],"need":[7],"for":[8],"low-precision":[9],"formats":[10],"that":[11,58],"enable":[12],"efficient,":[13],"large-scale":[14],"inference.":[15],"The":[16],"Open":[17],"Compute":[18],"Project":[19],"(OCP)":[20],"Microscaling":[21],"(MX)":[22],"standard":[23,97],"is":[24],"attractive":[25],"due":[26],"to":[27,89,115,136],"its":[28,33],"favorable":[29],"hardware":[30,65],"efficiency,":[31],"but":[32],"4-bit":[34],"variant":[35],"(MXFP4)":[36],"lags":[37],"behind":[38],"NVIDIA's":[39],"NVFP4":[40,111],"in":[41,151],"accuracy,":[42],"limiting":[43],"adoption.":[44],"We":[45],"introduce":[46],"two":[47],"software-only":[48],"techniques,":[49],"Overflow-Aware":[50],"Scaling":[51,56],"(OAS)":[52],"and":[53,96,101,110],"Macro":[54],"Block":[55],"(MBS),":[57],"improve":[59],"MXFP4":[60,109,131],"quantization":[61],"fidelity":[62],"without":[63],"requiring":[64],"changes.":[66],"OAS":[67,100],"reduces":[68],"overall":[69],"errors":[70],"by":[71],"increasing":[72],"effective":[73],"dynamic":[74],"range":[75],"under":[76],"power-of-two":[77],"block":[78],"scaling,":[79],"while":[80,120,141],"MBS":[81,102],"allocates":[82],"higher-precision":[83],"scaling":[84],"at":[85],"a":[86,133],"coarser":[87],"granularity":[88],"better":[90],"preserve":[91],"outliers.":[92],"Across":[93],"multiple":[94],"LLMs":[95],"downstream":[98],"benchmarks,":[99],"reduce":[103],"end-to-end":[105],"accuracy":[106,140],"gap":[107],"between":[108],"from":[112],"about":[113],"10%":[114],"below":[116],"1%":[117],"on":[118,126],"average,":[119],"incurring":[121],"modest":[122],"GEMM":[123],"overhead":[124],"(6.2%":[125],"average).":[127],"These":[128],"results":[129],"re-establish":[130],"as":[132],"practical":[134],"alternative":[135],"NVFP4,":[137],"enabling":[138],"near-NVFP4":[139],"retaining":[142],"MX's":[143],"hardware-efficiency":[144],"advantages":[145],"(e.g.,":[146],"12%":[147],"relative":[148],"area":[149],"savings":[150],"tensor":[152],"cores).":[153]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-12T00:00:00"}
