{"id":"https://openalex.org/W4414197356","doi":"https://doi.org/10.1109/dac63849.2025.11132485","title":"An Algorithm-Hardware Co-design Based on Revised Microscaling Format Quantization for Accelerating Large Language Models","display_name":"An Algorithm-Hardware Co-design Based on Revised Microscaling Format Quantization for Accelerating Large Language Models","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197356","doi":"https://doi.org/10.1109/dac63849.2025.11132485"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132485","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132485","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100732976","display_name":"Yanpeng Hao","orcid":"https://orcid.org/0000-0002-7938-2835"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yingbo Hao","raw_affiliation_strings":["South China University of Technology"],"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081375882","display_name":"Huangxu Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164767","display_name":"GCI Science & Technology (China)","ror":"https://ror.org/05tj2eg80","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210164767"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Huangxu Chen","raw_affiliation_strings":["Hong Kong University of Science and Technology (GZ),Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology (GZ),Guangzhou,China","institution_ids":["https://openalex.org/I4210164767","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458990","display_name":"Yi Zou","orcid":"https://orcid.org/0000-0002-4382-4670"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zou","raw_affiliation_strings":["South China University of Technology"],"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011692177","display_name":"Yanfeng Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanfeng Yang","raw_affiliation_strings":["South China University of Technology"],"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100732976"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27908212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.685699999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.685699999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6779999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.6345000267028809,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.7556999921798706},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7049999833106995},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.664900004863739},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6496999859809875},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5684999823570251},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5153999924659729},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.38749998807907104}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7700999975204468},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7556999921798706},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7049999833106995},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.664900004863739},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6496999859809875},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5684999823570251},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5153999924659729},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.38749998807907104},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3774999976158142},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.37299999594688416},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35359999537467957},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.3312000036239624},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.32179999351501465},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.31029999256134033},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.30230000615119934},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2921000123023987},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2709999978542328},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132485","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132485","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2883920103","https://openalex.org/W2998183051","https://openalex.org/W3004853346","https://openalex.org/W3100985894","https://openalex.org/W3133253223","https://openalex.org/W3177265267","https://openalex.org/W4242577057","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4380874652","https://openalex.org/W4401211807"],"related_works":[],"abstract_inverted_index":{"The":[0],"narrow-bit-width":[1],"data":[2],"format":[3,28,53,125],"is":[4,58,71],"crucial":[5],"for":[6,36,65],"reducing":[7],"the":[8,37,62,78,119,154,168],"computation":[9],"and":[10,61,108,126,144,158,165,180],"storage":[11],"costs":[12],"of":[13,81,156],"modern":[14],"deep":[15],"learning":[16],"applications,":[17],"particularly":[18,76],"in":[19,40,84],"large":[20,89],"language":[21],"models":[22],"(LLMs)":[23],"based":[24],"applications.":[25],"Microscaling":[26],"(MX)":[27],"has":[29],"been":[30],"proven":[31],"as":[32,173],"a":[33,51,88,101,109,128,150,162,176,181],"drop-in":[34],"replacement":[35],"baseline":[38],"FP32":[39],"existing":[41,55,169],"inference":[42,67],"frameworks,":[43],"with":[44,132],"low":[45,69],"user":[46],"friction.":[47],"However,":[48],"deploying":[49],"such":[50,82,172],"new":[52,163],"into":[54],"hardware":[56,142],"systems":[57],"still":[59,72],"challenging,":[60],"dominant":[63],"solution":[64],"LLM":[66],"at":[68],"precision":[70],"low-bit":[73],"quantization.":[74],"This":[75],"limits":[77],"strategic":[79],"applications":[80],"LLMs":[83],"real":[85],"deployment":[86],"on":[87],"scale.":[90],"In":[91],"this":[92],"work,":[93],"we":[94],"propose":[95],"an":[96,139,145],"algorithm-hardware":[97],"co-design":[98],"that":[99],"adopts":[100],"two-level":[102],"$Revised":[103,110],"MX":[104,111],"Format":[105,112],"Quantization$":[106],"(RMFQ)":[107],"Accelerator$":[113],"(RMFA)":[114],"architecture":[115,143],"design.":[116],"RMFQ":[117,152],"proposes":[118],"$revised":[120],"M":[121,123],"X(R":[122],"X)$":[124],"provides":[127,138],"novel":[129],"quantization":[130,160],"framework":[131],"innovative":[133],"group":[134],"direction.":[135],"Also,":[136],"RMFA":[137,166],"RMX":[140,146],"adaptive":[141],"encoding":[147],"scheme.":[148],"As":[149],"result,":[151],"pushes":[153],"limit":[155],"4-bit":[157],"6-bit":[159],"to":[161],"state-of-the-art,":[164],"surpasses":[167],"outlier-aware":[170],"accelerator":[171],"OliVe,":[174],"achieving":[175],"$1.28":[177],"\\times$":[178,183],"speedup":[179],"$1.31":[182],"energy":[184],"reduction.":[185]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
