{"id":"https://openalex.org/W4404133534","doi":"https://doi.org/10.1145/3649329.3657323","title":"OPAL: Outlier-Preserved Microscaling Quantization Accelerator for Generative Large Language Models","display_name":"OPAL: Outlier-Preserved Microscaling Quantization Accelerator for Generative Large Language Models","publication_year":2024,"publication_date":"2024-06-23","ids":{"openalex":"https://openalex.org/W4404133534","doi":"https://doi.org/10.1145/3649329.3657323"},"language":"en","primary_location":{"id":"doi:10.1145/3649329.3657323","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3649329.3657323","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 61st ACM/IEEE Design Automation Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jahyun Koo","orcid":"https://orcid.org/0000-0002-8047-2416"},"institutions":[{"id":"https://openalex.org/I193352282","display_name":"Daegu Gyeongbuk Institute of Science and Technology","ror":"https://ror.org/03frjya69","country_code":"KR","type":"education","lineage":["https://openalex.org/I193352282"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jahyun Koo","raw_affiliation_strings":["DGIST, Daegu, Hyeonpung-eup, Dalseong-gun, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-8047-2416","affiliations":[{"raw_affiliation_string":"DGIST, Daegu, Hyeonpung-eup, Dalseong-gun, Republic of Korea","institution_ids":["https://openalex.org/I193352282"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dahoon Park","orcid":"https://orcid.org/0000-0001-5652-5306"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dahoon Park","raw_affiliation_strings":["Korea University, Seoul, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-5652-5306","affiliations":[{"raw_affiliation_string":"Korea University, Seoul, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101984266","display_name":"Sangwoo Jung","orcid":"https://orcid.org/0009-0008-1150-9888"},"institutions":[{"id":"https://openalex.org/I193352282","display_name":"Daegu Gyeongbuk Institute of Science and Technology","ror":"https://ror.org/03frjya69","country_code":"KR","type":"education","lineage":["https://openalex.org/I193352282"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sangwoo Jung","raw_affiliation_strings":["DGIST, Daegu, Hyeonpung-eup, Dalseong-gun, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0008-1150-9888","affiliations":[{"raw_affiliation_string":"DGIST, Daegu, Hyeonpung-eup, Dalseong-gun, Republic of Korea","institution_ids":["https://openalex.org/I193352282"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079872127","display_name":"Jaeha Kung","orcid":"https://orcid.org/0000-0001-6151-8602"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaeha Kung","raw_affiliation_strings":["Korea University, Seoul, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-6151-8602","affiliations":[{"raw_affiliation_string":"Korea University, Seoul, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I197347611"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I193352282"],"apc_list":null,"apc_paid":null,"fwci":2.9802,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.92419609,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6883458495140076},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.6286787986755371},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5974794626235962},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5707813501358032},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48233935236930847},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42991572618484497},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4129120111465454},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21172448992729187}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6883458495140076},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.6286787986755371},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5974794626235962},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5707813501358032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48233935236930847},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42991572618484497},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4129120111465454},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21172448992729187}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3649329.3657323","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3649329.3657323","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 61st ACM/IEEE Design Automation Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2072730350","https://openalex.org/W3211525823","https://openalex.org/W4281485151","https://openalex.org/W4281651027","https://openalex.org/W4285212630","https://openalex.org/W4285601701","https://openalex.org/W4385245566","https://openalex.org/W4386764384","https://openalex.org/W4390873361","https://openalex.org/W4393147284","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2380075625","https://openalex.org/W3006513224","https://openalex.org/W2046456988","https://openalex.org/W2357409937","https://openalex.org/W2510582230","https://openalex.org/W2978674666","https://openalex.org/W2074430941","https://openalex.org/W2113096305","https://openalex.org/W1977636359","https://openalex.org/W2772305933"],"abstract_inverted_index":{"To":[0],"overcome":[1],"the":[2,5,63,101,119,167,174],"burden":[3],"on":[4,29,83,146],"memory":[6],"size":[7,13],"and":[8,131,153,172],"bandwidth":[9],"due":[10],"to":[11,97,110,114,155,165],"ever-increasing":[12],"of":[14,54,77,85,104,125],"large":[15],"language":[16],"models":[17],"(LLMs),":[18],"aggressive":[19],"weight":[20],"quantization":[21,59],"has":[22],"been":[23],"recently":[24],"studied,":[25],"while":[26,67,107],"lacking":[27],"research":[28],"quantizing":[30],"activations.":[31],"In":[32,140],"this":[33],"paper,":[34],"we":[35,117,162],"present":[36,118],"a":[37,56,160],"hardware-software":[38],"co-design":[39],"method":[40,60],"that":[41,61,92,123,149],"results":[42],"in":[43,100],"an":[44,105],"energy-efficient":[45],"LLM":[46],"accelerator,":[47],"named":[48],"OPAL,":[49],"for":[50,95,128,135],"generation":[51],"tasks.":[52],"First":[53],"all,":[55],"novel":[57],"activation":[58],"leverages":[62],"microscaling":[64],"data":[65],"format":[66],"preserving":[68,86],"several":[69],"outliers":[70,130],"per":[71],"subtensor":[72],"block":[73,103],"(e.g.,":[74],"four":[75],"out":[76],"128":[78],"elements)":[79],"is":[80,90],"proposed.":[81],"Second,":[82],"top":[84],"outliers,":[87],"mixed":[88],"precision":[89],"utilized":[91],"sets":[93],"5-bit":[94],"inputs":[96,109],"sensitive":[98,112],"layers":[99,113],"decoder":[102],"LLM,":[106],"keeping":[108],"less":[111],"3-bit.":[115],"Finally,":[116],"OPAL":[120,142],"hardware":[121],"architecture":[122],"consists":[124],"FP":[126],"units":[127],"handling":[129],"vectorized":[132],"INT":[133],"multipliers":[134],"dominant":[136],"non-outlier":[137],"related":[138],"operations.":[139],"addition,":[141],"uses":[143],"log2-based":[144],"approximation":[145],"softmax":[147],"operations":[148],"only":[150],"requires":[151],"shift":[152],"subtraction":[154],"maximize":[156],"power":[157],"efficiency.":[158],"As":[159],"result,":[161],"are":[163],"able":[164],"improve":[166],"energy":[168],"efficiency":[169],"by":[170,176],"1.6~2.2\u00d7,":[171],"reduce":[173],"area":[175],"2.4~3.1\u00d7":[177],"with":[178],"negligible":[179],"accuracy":[180],"loss,":[181],"i.e.,":[182],"<1":[183],"perplexity":[184],"increase.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
