{"id":"https://openalex.org/W4408399636","doi":"https://doi.org/10.1109/tcsi.2025.3547732","title":"OFQ-LLM: Outlier-Flexing Quantization for Efficient Low-Bit Large Language Model Acceleration","display_name":"OFQ-LLM: Outlier-Flexing Quantization for Efficient Low-Bit Large Language Model Acceleration","publication_year":2025,"publication_date":"2025-03-13","ids":{"openalex":"https://openalex.org/W4408399636","doi":"https://doi.org/10.1109/tcsi.2025.3547732"},"language":"en","primary_location":{"id":"doi:10.1109/tcsi.2025.3547732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2025.3547732","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006070507","display_name":"Gang Wang","orcid":"https://orcid.org/0009-0003-6944-2958"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Gang Wang","raw_affiliation_strings":["State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103263156","display_name":"Siqi Cai","orcid":"https://orcid.org/0000-0003-1478-2202"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siqi Cai","raw_affiliation_strings":["State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408977","display_name":"Wenjie Li","orcid":"https://orcid.org/0000-0002-1244-7657"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjie Li","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066692868","display_name":"Dongxu Lyu","orcid":"https://orcid.org/0000-0001-6826-2670"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxu Lyu","raw_affiliation_strings":["State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054897331","display_name":"Guanghui He","orcid":"https://orcid.org/0000-0002-0486-6421"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanghui He","raw_affiliation_strings":["State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of MicroNano Engineering Science, School of Integrated Circuits, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"School of Integrated Circuits, State Key Laboratory of MicroNano Engineering Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5006070507"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":5.4326,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94665803,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"72","issue":"8","first_page":"4077","last_page":"4090"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9101999998092651,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.706188440322876},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.6198652982711792},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5554102659225464},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.49425768852233887},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.4405769109725952},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.35847610235214233},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.327589750289917},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3184073567390442},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.22189423441886902},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.20511853694915771}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.706188440322876},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.6198652982711792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5554102659225464},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.49425768852233887},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.4405769109725952},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.35847610235214233},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.327589750289917},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3184073567390442},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.22189423441886902},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.20511853694915771},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsi.2025.3547732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2025.3547732","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5227578763","display_name":null,"funder_award_id":"92464302","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2093647425","https://openalex.org/W2883920103","https://openalex.org/W3100985894","https://openalex.org/W3175752238","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4385245566","https://openalex.org/W4388979610","https://openalex.org/W4390871466","https://openalex.org/W4393406920","https://openalex.org/W6727099177","https://openalex.org/W6769627184","https://openalex.org/W6772383348","https://openalex.org/W6778883912","https://openalex.org/W6782879696","https://openalex.org/W6811340617","https://openalex.org/W6842758154","https://openalex.org/W6846164622","https://openalex.org/W6847478871","https://openalex.org/W6850625674","https://openalex.org/W6850936240","https://openalex.org/W6851828392","https://openalex.org/W6852584927","https://openalex.org/W6853804809","https://openalex.org/W6854866820","https://openalex.org/W6856696905","https://openalex.org/W6856969750","https://openalex.org/W6857799723","https://openalex.org/W6861380575","https://openalex.org/W6861444185","https://openalex.org/W6862020115","https://openalex.org/W6877022392"],"related_works":["https://openalex.org/W4327546585","https://openalex.org/W2411923897","https://openalex.org/W4394546135","https://openalex.org/W4285347720","https://openalex.org/W4200259850","https://openalex.org/W2333831899","https://openalex.org/W2484894494","https://openalex.org/W2367385042","https://openalex.org/W4381186982","https://openalex.org/W2040781570"],"abstract_inverted_index":{"Large":[0],"Language":[1,11],"Models":[2],"(LLMs)":[3],"have":[4],"achieved":[5],"significant":[6],"success":[7],"in":[8,96,154,240,266],"various":[9],"Natural":[10],"Processing":[12],"(NLP)":[13],"tasks,":[14],"becoming":[15],"essential":[16],"to":[17,39,58,76,122,133,149,168,192,208,220,246],"modern":[18],"intelligent":[19],"computing.":[20],"Their":[21],"large":[22],"memory":[23],"footprint":[24],"and":[25,43,128,152,173,179,230,244,256],"high":[26,177,180],"computational":[27],"cost":[28],"hinder":[29],"efficient":[30],"deployment.":[31],"Post-Training":[32],"Quantization":[33],"(PTQ)":[34],"is":[35,88,166],"a":[36,97,144,157],"promising":[37],"technique":[38],"alleviate":[40],"this":[41,63],"issue":[42],"accelerate":[44,78,174],"LLM":[45,56,79,196,241,267],"inference.":[46],"However,":[47],"the":[48,53,104,139,170,202],"presence":[49],"of":[50,55,86],"outliers":[51],"impedes":[52],"advancement":[54],"quantization":[57,75],"lower":[59],"bit":[60,159],"levels.":[61,82],"In":[62],"paper,":[64],"we":[65,117,142],"introduce":[66,143],"OFQ-LLM,":[67],"an":[68],"algorithm-hardware":[69],"co-design":[70],"solution":[71],"that":[72,89,186],"adopts":[73],"outlier-flexing":[74,146],"efficiently":[77,94],"at":[80,156],"low-bit":[81,195],"The":[83,161],"key":[84],"insight":[85],"OFQ-LLM":[87,187],"normal":[90,126],"data":[91,100,127],"can":[92,108],"be":[93,109],"quantized":[95],"slightly":[98],"reduced":[99],"encoding":[101,106,147,171],"space,":[102],"while":[103],"rest":[105],"space":[107],"used":[110],"for":[111,125],"flexible":[112],"outlier":[113,130,136],"values.":[114,137],"During":[115],"quantization,":[116],"use":[118],"rescale-based":[119],"clipping":[120],"(RBC)":[121],"optimize":[123],"accuracy":[124,190],"group":[129],"clustering":[131],"(GOC)":[132],"flexibly":[134],"represent":[135],"At":[138],"hardware":[140,164],"level,":[141],"memory-aligned":[145],"scheme":[148,172],"encode":[150],"activations":[151],"weights":[153],"LLMs":[155,175],"low":[158],"level.":[160],"outlier-normal":[162],"mixed":[163],"architecture":[165],"devised":[167],"leverage":[169],"with":[176,270],"speed":[178,228,254],"energy":[181,217,238,264],"efficiency.":[182],"Our":[183],"experiments":[184],"show":[185],"achieves":[188],"better":[189],"compared":[191],"state-of-the-art":[193],"(SOTA)":[194],"PTQ":[197],"works.":[198],"OFQ-LLM-based":[199],"accelerator":[200],"surpasses":[201],"SOTA":[203],"outlier-aware":[204],"accelerators":[205],"by":[206],"up":[207,219,229,245,255],"<inline-formula":[209,221,231,247,257],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[210,222,232,248,258],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[211,223,233,249,259],"<tex-math":[212,224,234,250,260],"notation=\"LaTeX\">$2.69\\times":[213],"$":[214,226,236,252,262],"</tex-math></inline-formula>":[215,227,237,253,263],"core":[216],"efficiency,":[218],"notation=\"LaTeX\">$3.83\\times":[225],"notation=\"LaTeX\">$2.44\\times":[235],"reduction":[239,265],"prefilling":[242],"phase,":[243,269],"notation=\"LaTeX\">$2.01\\times":[251],"notation=\"LaTeX\">$2.88\\times":[261],"decoding":[268],"superior":[271],"accuracy.":[272]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
