{"id":"https://openalex.org/W4411688651","doi":"https://doi.org/10.1109/arith64983.2025.00011","title":"An Empirical Study of Microscaling Formats for Low-Precision LLM Training","display_name":"An Empirical Study of Microscaling Formats for Low-Precision LLM Training","publication_year":2025,"publication_date":"2025-05-04","ids":{"openalex":"https://openalex.org/W4411688651","doi":"https://doi.org/10.1109/arith64983.2025.00011"},"language":"en","primary_location":{"id":"doi:10.1109/arith64983.2025.00011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/arith64983.2025.00011","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 32nd Symposium on Computer Arithmetic (ARITH)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100879012","display_name":"Hanmei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hanmei Yang","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024307057","display_name":"Summer Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Summer Deng","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118454189","display_name":"Amit Nagpal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amit Nagpal","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038603364","display_name":"Maxim Naumov","orcid":"https://orcid.org/0000-0002-6102-2903"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maxim Naumov","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mohammad Janani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mohammad Janani","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038870570","display_name":"Tongping Liu","orcid":"https://orcid.org/0000-0002-1968-4081"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tongping Liu","raw_affiliation_strings":["University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085489377","display_name":"Hui Guan","orcid":"https://orcid.org/0000-0001-9128-2231"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hui Guan","raw_affiliation_strings":["University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100879012"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8339,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.85128019,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11301","display_name":"Advanced Surface Polishing Techniques","score":0.942799985408783,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11301","display_name":"Advanced Surface Polishing Techniques","score":0.942799985408783,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11583","display_name":"Advanced Measurement and Metrology Techniques","score":0.901199996471405,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7050278186798096},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5182897448539734},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.375042587518692}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7050278186798096},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5182897448539734},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.375042587518692},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/arith64983.2025.00011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/arith64983.2025.00011","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 32nd Symposium on Computer Arithmetic (ARITH)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W3035756815","https://openalex.org/W3211043030","https://openalex.org/W4380874652","https://openalex.org/W6745245109","https://openalex.org/W6783644704"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2810751659"],"abstract_inverted_index":{"This":[0,97],"paper":[1],"presents":[2],"a":[3],"comprehensive":[4],"evaluation":[5],"of":[6,13,33,68,80,107],"microscaling":[7],"(MX)":[8],"quantization":[9],"in":[10,116],"the":[11,23,31,66,92,103,111],"pre-training":[12],"large":[14],"language":[15],"models":[16,61],"(LLMs),":[17],"investigating":[18],"its":[19],"potential":[20],"to":[21],"enhance":[22,88],"computation":[24],"and":[25,46,52,73,82,105],"memory":[26],"efficiencies.":[27],"We":[28],"systematically":[29],"examine":[30],"effects":[32],"key":[34],"design":[35],"parameters":[36],"-":[37,48],"including":[38],"data":[39],"types,":[40],"rounding":[41],"modes,":[42],"scaling":[43],"strategies,":[44],"granularity,":[45],"organization":[47],"on":[49,59,102],"numerical":[50],"accuracy":[51],"training":[53,70,89],"stability.":[54],"Our":[55],"extensive":[56],"experimental":[57],"study":[58],"Llama3":[60],"reveals":[62],"critical":[63],"insights":[64],"into":[65],"challenges":[67],"4-bit":[69,81],"for":[71,113],"LLMs":[72],"identifies":[74],"optimal":[75],"configurations":[76],"with":[77,94],"mixed":[78],"precisions":[79],"6-bit":[83],"MX":[84,108],"formats":[85],"that":[86],"significantly":[87],"quality,":[90],"bridging":[91],"gap":[93],"higher-precision":[95],"formats.":[96],"research":[98],"provides":[99],"valuable":[100],"guidance":[101],"benefits":[104],"limitations":[106],"quantization,":[109],"laying":[110],"groundwork":[112],"future":[114],"innovations":[115],"low-precision":[117],"LLM":[118],"training.":[119]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
