{"id":"https://openalex.org/W7137274424","doi":"https://doi.org/10.48550/arxiv.2603.13180","title":"MXNorm: Reusing MXFP block scales for efficient tensor normalisation","display_name":"MXNorm: Reusing MXFP block scales for efficient tensor normalisation","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7137274424","doi":"https://doi.org/10.48550/arxiv.2603.13180"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13180","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13180","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13180","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129458206","display_name":"Callum McLean","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"McLean, Callum","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129531846","display_name":"Luke Y. Prince","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prince, Luke Y.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129532577","display_name":"Alexandre Payot","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Payot, Alexandre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129525736","display_name":"Paul Balan\u00e7a","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balan\u00e7a, Paul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5015625929","display_name":"Carlo Luschi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luschi, Carlo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5129458206"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11693","display_name":"Cryptography and Residue Arithmetic","score":0.17990000545978546,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11693","display_name":"Cryptography and Residue Arithmetic","score":0.17990000545978546,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.15189999341964722,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.1264999955892563,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8652999997138977},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6399999856948853},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6281999945640564},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6128000020980835},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5514000058174133},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.527899980545044},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5157999992370605},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5091000199317932}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8652999997138977},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6399999856948853},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6281999945640564},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6128000020980835},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6043000221252441},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5712000131607056},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5514000058174133},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.527899980545044},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5157999992370605},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5091000199317932},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4724000096321106},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4717999994754791},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.43209999799728394},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.3714999854564667},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.35830000042915344},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.33730000257492065},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3325999975204468},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.33160001039505005},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.28859999775886536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.2685000002384186},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2653000056743622},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25769999623298645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13180","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13180","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13180","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13180","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Matrix":[0],"multiplication":[1,32],"performance":[2,33,39],"has":[3,15],"long":[4],"been":[5],"the":[6,17,66,70,77,86],"major":[7],"bottleneck":[8],"to":[9,117,136,143],"scaling":[10],"deep":[11],"learning":[12],"workloads,":[13],"which":[14,45],"stimulated":[16],"design":[18],"of":[19,76,88,100,104,113,134],"new":[20],"accelerators":[21],"that":[22,64],"use":[23],"increasingly":[24],"low-precision":[25],"number":[26],"formats.":[27],"However,":[28],"improvements":[29,37],"in":[30,38,50,85,147,153,159],"matrix":[31],"have":[34],"far":[35],"outstripped":[36],"on":[40,98],"reductions":[41],"and":[42,80,107,155],"elementwise":[43],"computations,":[44],"are":[46],"still":[47],"being":[48],"performed":[49],"higher":[51],"precision.":[52],"In":[53],"this":[54],"work,":[55],"we":[56],"propose":[57],"MXNorm,":[58],"a":[59,82,118,144,156],"drop-in":[60],"replacement":[61],"for":[62,91,138],"RMSNorm":[63,121],"estimates":[65],"RMS":[67],"using":[68,120,131],"only":[69,132],"block":[71],"scales":[72],"calculated":[73],"as":[74],"part":[75],"MXFP8":[78,123,154],"cast":[79],"enables":[81],"32x":[83],"decrease":[84],"size":[87],"reduction":[89],"needed":[90],"normalization.":[92],"We":[93,125],"validate":[94],"our":[95],"approximation":[96],"method":[97],"pre-training":[99],"Llama":[101,148],"3":[102,149],"models":[103],"125M,":[105],"1B":[106],"8B":[108,150],"parameters,":[109],"finding":[110],"minimal":[111],"loss":[112],"training":[114],"accuracy":[115],"compared":[116],"baseline":[119],"with":[122],"matmuls.":[124],"also":[126],"show":[127],"practical":[128],"kernel":[129],"speedups":[130],"torch.compile":[133],"up":[135],"2.4x":[137],"MXNorm":[139],"over":[140],"RMSNorm,":[141],"corresponding":[142],"1.3%":[145],"speedup":[146,158],"transformer":[151],"layers":[152],"2.6%":[157],"NVFP4.":[160]},"counts_by_year":[],"updated_date":"2026-03-17T07:05:13.627479","created_date":"2026-03-17T00:00:00"}
