{"id":"https://openalex.org/W4409641050","doi":"https://doi.org/10.1109/jetcas.2025.3562937","title":"Adaptive Two-Range Quantization and Hardware Co-Design for Large Language Model Acceleration","display_name":"Adaptive Two-Range Quantization and Hardware Co-Design for Large Language Model Acceleration","publication_year":2025,"publication_date":"2025-04-21","ids":{"openalex":"https://openalex.org/W4409641050","doi":"https://doi.org/10.1109/jetcas.2025.3562937"},"language":"en","primary_location":{"id":"doi:10.1109/jetcas.2025.3562937","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jetcas.2025.3562937","pdf_url":null,"source":{"id":"https://openalex.org/S142323794","display_name":"IEEE Journal on Emerging and Selected Topics in Circuits and Systems","issn_l":"2156-3357","issn":["2156-3357","2156-3365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Emerging and Selected Topics in Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103263156","display_name":"Siqi Cai","orcid":"https://orcid.org/0000-0003-1478-2202"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Siqi Cai","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006070507","display_name":"Gang Wang","orcid":"https://orcid.org/0009-0003-6944-2958"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Wang","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408977","display_name":"Wenjie Li","orcid":"https://orcid.org/0000-0002-1244-7657"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjie Li","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066692868","display_name":"Dongxu Lyu","orcid":"https://orcid.org/0000-0001-6826-2670"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxu Lyu","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054897331","display_name":"Guanghui He","orcid":"https://orcid.org/0000-0002-0486-6421"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanghui He","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","AI Institute, School of Electronic Information and Electrical Engineering and the MoE Key Laboratory of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"AI Institute, School of Electronic Information and Electrical Engineering and the MoE Key Laboratory of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103263156"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":2.3568,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88705373,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"15","issue":"2","first_page":"272","last_page":"284"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7598999738693237,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7598999738693237,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.7307000160217285,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14025","display_name":"Educational Technology and Assessment","score":0.7085000276565552,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.5764096975326538},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5706309080123901},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5481139421463013},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49991655349731445},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.48915985226631165},{"id":"https://openalex.org/keywords/dynamic-range","display_name":"Dynamic range","score":0.4321584105491638},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.3560626208782196},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.34181633591651917},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.3315947353839874},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.29568910598754883},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2761968672275543},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.24198535084724426},{"id":"https://openalex.org/keywords/aerospace-engineering","display_name":"Aerospace engineering","score":0.15813371539115906},{"id":"https://openalex.org/keywords/quantum-mechanics","display_name":"Quantum mechanics","score":0.11520156264305115},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10957944393157959}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.5764096975326538},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5706309080123901},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5481139421463013},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49991655349731445},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.48915985226631165},{"id":"https://openalex.org/C87133666","wikidata":"https://www.wikidata.org/wiki/Q1161699","display_name":"Dynamic range","level":2,"score":0.4321584105491638},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.3560626208782196},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.34181633591651917},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.3315947353839874},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.29568910598754883},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2761968672275543},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.24198535084724426},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.15813371539115906},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.11520156264305115},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10957944393157959}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jetcas.2025.3562937","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jetcas.2025.3562937","pdf_url":null,"source":{"id":"https://openalex.org/S142323794","display_name":"IEEE Journal on Emerging and Selected Topics in Circuits and Systems","issn_l":"2156-3357","issn":["2156-3357","2156-3365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Emerging and Selected Topics in Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5227578763","display_name":null,"funder_award_id":"92464302","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5739802379","display_name":null,"funder_award_id":"62074097","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1674493795","https://openalex.org/W2093647425","https://openalex.org/W2883920103","https://openalex.org/W2890894339","https://openalex.org/W2891004123","https://openalex.org/W3092209569","https://openalex.org/W3100985894","https://openalex.org/W3156631773","https://openalex.org/W3212727748","https://openalex.org/W4206223617","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4385245566","https://openalex.org/W4390871466","https://openalex.org/W4393305455","https://openalex.org/W4402670433","https://openalex.org/W4404133677","https://openalex.org/W6727099177","https://openalex.org/W6746698991","https://openalex.org/W6749838110","https://openalex.org/W6769627184","https://openalex.org/W6778883912","https://openalex.org/W6799009007","https://openalex.org/W6811340617","https://openalex.org/W6842258392","https://openalex.org/W6846164622","https://openalex.org/W6847478871","https://openalex.org/W6850625674","https://openalex.org/W6851828392"],"related_works":["https://openalex.org/W2565094479","https://openalex.org/W2390829436","https://openalex.org/W1989791859","https://openalex.org/W602859758","https://openalex.org/W1971289376","https://openalex.org/W2379101322","https://openalex.org/W1992553864","https://openalex.org/W2776207444","https://openalex.org/W2146872326","https://openalex.org/W3158825072"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"face":[4],"high":[5],"computational":[6],"and":[7,37,56,81,92,106,130,137,169,177,180],"memory":[8,18,24,90],"demands.":[9],"While":[10],"prior":[11],"studies":[12],"have":[13],"leveraged":[14],"quantization":[15,27,35,54,69,94],"to":[16,78,112,166],"reduce":[17,113],"requirements,":[19],"critical":[20],"challenges":[21],"persist:":[22],"unaligned":[23],"accesses,":[25],"significant":[26],"errors":[28],"when":[29],"handling":[30],"outliers":[31,80],"that":[32,146,157],"span":[33],"larger":[34],"ranges,":[36,86],"the":[38,114,135,148,158,185],"increased":[39],"hardware":[40,57,98,115],"overhead":[41,116,149],"associated":[42,117,150],"with":[43,71,118,151,188],"processing":[44,109],"high-bit-width":[45,119],"outliers.":[46],"To":[47,126],"address":[48],"these":[49],"issues,":[50],"we":[51,100,140],"propose":[52],"a":[53,65,97,102,142],"algorithm":[55],"architecture":[58],"co-design":[59],"for":[60],"efficient":[61],"LLM":[62,174],"acceleration.":[63],"Algorithmically,":[64],"grouped":[66],"adaptive":[67],"two-range":[68],"(ATRQ)":[70],"an":[72,107],"in-group":[73],"embedded":[74],"identifier":[75],"is":[76],"proposed":[77],"encode":[79],"normal":[82],"values":[83],"in":[84,173,184],"distinct":[85],"achieving":[87,164],"hardware-friendly":[88],"aligned":[89],"access":[91],"reducing":[93],"errors.":[95],"From":[96],"perspective,":[99],"develop":[101],"low-overhead":[103],"ATRQ":[104],"decoder":[105],"outlier-bitsplit":[108],"element":[110],"(PE)":[111],"outliers,":[120],"effectively":[121],"leveraging":[122],"their":[123],"inherent":[124],"sparsity.":[125],"support":[127],"mixed-precision":[128],"computation":[129],"accommodate":[131],"diverse":[132],"dataflows":[133],"during":[134],"prefilling":[136,175],"decoding":[138,186],"phases,":[139],"design":[141],"reconfigurable":[143],"local":[144],"accumulator":[145],"mitigates":[147],"additional":[152],"adders.":[153],"Experimental":[154],"results":[155],"show":[156],"ATRQ-based":[159],"accelerator":[160],"outperforms":[161],"existing":[162],"solutions,":[163],"up":[165],"2.48\u00d7":[167],"speedup":[168,179],"2.01\u00d7":[170],"energy":[171,182],"reduction":[172,183],"phase,":[176,187],"1.87\u00d7":[178],"2.03\u00d7":[181],"superior":[189],"model":[190],"performance":[191],"under":[192],"post-training":[193],"quantization.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
