{"id":"https://openalex.org/W4404569218","doi":"https://doi.org/10.48550/arxiv.2411.09909","title":"AMXFP4: Taming Activation Outliers with Asymmetric Microscaling Floating-Point for 4-bit LLM Inference","display_name":"AMXFP4: Taming Activation Outliers with Asymmetric Microscaling Floating-Point for 4-bit LLM Inference","publication_year":2024,"publication_date":"2024-11-15","ids":{"openalex":"https://openalex.org/W4404569218","doi":"https://doi.org/10.48550/arxiv.2411.09909"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.09909","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.09909","pdf_url":"https://arxiv.org/pdf/2411.09909","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.09909","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036363860","display_name":"Janghwan Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lee, Janghwan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101918926","display_name":"Jiwoong Park","orcid":"https://orcid.org/0000-0002-0511-1442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jiwoong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100757430","display_name":"Jin-Seok Kim","orcid":"https://orcid.org/0000-0001-5247-1603"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jinseok","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091502365","display_name":"Yongjik Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Yongjik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110546049","display_name":"Jungju Oh","orcid":"https://orcid.org/0009-0009-9451-251X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oh, Jungju","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089478486","display_name":"Jinwook Oh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oh, Jinwook","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5078440061","display_name":"Jungwook Choi","orcid":"https://orcid.org/0000-0002-3075-8694"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Jungwook","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5036363860"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10472","display_name":"Semiconductor materials and devices","score":0.9448999762535095,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9128999710083008,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6808093786239624},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.6459054946899414},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.6101404428482056},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4875783324241638},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.451039582490921},{"id":"https://openalex.org/keywords/cut-point","display_name":"Cut-point","score":0.4102649688720703},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3958870768547058},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2726820707321167},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2651047706604004},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.21582475304603577},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13556072115898132},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.0694308876991272}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6808093786239624},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.6459054946899414},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.6101404428482056},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4875783324241638},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.451039582490921},{"id":"https://openalex.org/C2781415353","wikidata":"https://www.wikidata.org/wiki/Q5196602","display_name":"Cut-point","level":2,"score":0.4102649688720703},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3958870768547058},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2726820707321167},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2651047706604004},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.21582475304603577},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13556072115898132},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0694308876991272}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.09909","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.09909","pdf_url":"https://arxiv.org/pdf/2411.09909","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.09909","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.09909","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.09909","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.09909","pdf_url":"https://arxiv.org/pdf/2411.09909","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404569218.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2411923897","https://openalex.org/W4394546135","https://openalex.org/W4285347720","https://openalex.org/W4200259850","https://openalex.org/W2333831899","https://openalex.org/W2484894494","https://openalex.org/W2367385042","https://openalex.org/W4381186982","https://openalex.org/W2040781570","https://openalex.org/W2393365719"],"abstract_inverted_index":{"As":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"grow":[5],"in":[6,52],"parameter":[7],"size":[8],"and":[9,48,87,164],"context":[10],"length,":[11],"computation":[12],"precision":[13],"has":[14],"been":[15],"reduced":[16],"from":[17],"16-bit":[18],"to":[19,21,32],"4-bit":[20,131],"improve":[22],"inference":[23],"efficiency.":[24],"However,":[25,77],"this":[26,39,97],"reduction":[27],"causes":[28],"accuracy":[29],"degradation":[30],"due":[31],"activation":[33,112],"outliers.":[34],"Rotation-based":[35],"INT4":[36],"methods":[37,167],"address":[38,125],"via":[40],"matrix":[41,70],"calibration,":[42],"but":[43],"they":[44],"introduce":[45],"multi-hour":[46],"overheads":[47],"leave":[49],"key":[50],"computations":[51],"full":[53],"precision.":[54],"Microscaling":[55],"(MX)":[56],"floating-point":[57],"(FP)":[58],"formats":[59,92],"offer":[60],"fine-grained":[61],"representation":[62],"with":[63],"a":[64,130],"shared":[65,141],"scale,":[66],"enabling":[67],"fully":[68],"quantized":[69],"multiplications":[71],"through":[72],"direct":[73],"casting":[74],"without":[75,143],"calibration.":[76,145],"existing":[78],"research":[79],"shows":[80],"unsatisfactory":[81],"empirical":[82],"results":[83],"for":[84],"MXFP4":[85,159,178],"inference,":[86],"the":[88,101,105,118],"robustness":[89],"of":[90,104,120],"MX":[91,106],"remains":[93],"largely":[94],"unexplored.":[95],"In":[96],"work,":[98],"we":[99,127],"uncover":[100],"fundamental":[102],"tradeoffs":[103],"format:":[107],"while":[108,154],"it":[109,114],"effectively":[110],"suppresses":[111],"outliers,":[113],"does":[115],"so":[116],"at":[117],"cost":[119,153],"increased":[121],"group-wise":[122],"asymmetry.":[123],"To":[124],"this,":[126],"propose":[128],"AMXFP4,":[129],"asymmetric":[132,140],"FP":[133],"format":[134],"that":[135],"handles":[136],"both":[137],"issues":[138],"using":[139],"scales,":[142],"requiring":[144],"Our":[146],"custom":[147],"MAC":[148],"engine":[149],"adds":[150],"negligible":[151],"hardware":[152],"improving":[155],"accuracy:":[156],"AMXFP4":[157],"outperforms":[158],"by":[160,168],"3%":[161],"on":[162,170],"VQA":[163],"exceeds":[165],"rotation-based":[166],"1.6%":[169],"CSQA.":[171],"It":[172],"also":[173],"surpasses":[174],"recently":[175],"deployed":[176],"commercial":[177],"variants.":[179],"Code:":[180],"https://github.com/aiha-lab/MX-QLLM":[181]},"counts_by_year":[],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
