{"id":"https://openalex.org/W4409248600","doi":"https://doi.org/10.1109/hpca61900.2025.00086","title":"M-ANT: Efficient Low-bit Group Quantization for LLMs via Mathematically Adaptive Numerical Type","display_name":"M-ANT: Efficient Low-bit Group Quantization for LLMs via Mathematically Adaptive Numerical Type","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248600","doi":"https://doi.org/10.1109/hpca61900.2025.00086"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00086","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00086","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101721549","display_name":"Weiming Hu","orcid":"https://orcid.org/0009-0003-5115-0498"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weiming Hu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087284551","display_name":"Haoyan Zhang","orcid":"https://orcid.org/0000-0003-3067-0710"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyan Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101706991","display_name":"Cong Guo","orcid":"https://orcid.org/0000-0002-4479-5525"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cong Guo","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101554524","display_name":"Feng Yu","orcid":"https://orcid.org/0000-0002-8898-6120"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Feng","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Renyang Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renyang Guan","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112561881","display_name":"Zhendong Hua","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Hua","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100623100","display_name":"Zihan Liu","orcid":"https://orcid.org/0000-0002-0874-0682"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihan Liu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114626347","display_name":"Yue Guan","orcid":"https://orcid.org/0000-0001-7773-4588"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Guan","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100959121","display_name":"Minyi Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003939279","display_name":"Jingwen Leng","orcid":"https://orcid.org/0000-0002-5660-5493"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwen Leng","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5101721549"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":13.4227,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.99034031,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1112","last_page":"1126"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10522","display_name":"Medical Imaging Techniques and Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10522","display_name":"Medical Imaging Techniques and Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11993","display_name":"Atomic and Subatomic Physics Research","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12334","display_name":"Lung Cancer Research Studies","score":0.961899995803833,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ant","display_name":"ANT","score":0.6560755372047424},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6086931228637695},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.565804660320282},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.51472407579422},{"id":"https://openalex.org/keywords/type","display_name":"Type (biology)","score":0.5023996829986572},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.487310528755188},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.39115405082702637},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.270754873752594},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.26552361249923706},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.12474378943443298},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.11103770136833191},{"id":"https://openalex.org/keywords/quantum-mechanics","display_name":"Quantum mechanics","score":0.10902836918830872},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1031719446182251},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.07407650351524353}],"concepts":[{"id":"https://openalex.org/C97467695","wikidata":"https://www.wikidata.org/wiki/Q295802","display_name":"ANT","level":2,"score":0.6560755372047424},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6086931228637695},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.565804660320282},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.51472407579422},{"id":"https://openalex.org/C2777299769","wikidata":"https://www.wikidata.org/wiki/Q3707858","display_name":"Type (biology)","level":2,"score":0.5023996829986572},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.487310528755188},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.39115405082702637},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.270754873752594},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26552361249923706},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.12474378943443298},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.11103770136833191},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.10902836918830872},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1031719446182251},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.07407650351524353},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00086","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00086","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.47999998927116394,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322999","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":86,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W2963148663","https://openalex.org/W2963339397","https://openalex.org/W2963367920","https://openalex.org/W2998183051","https://openalex.org/W3034887213","https://openalex.org/W3034940165","https://openalex.org/W3043504674","https://openalex.org/W3092319711","https://openalex.org/W3100083812","https://openalex.org/W3100985894","https://openalex.org/W3102510044","https://openalex.org/W3115555382","https://openalex.org/W3132616766","https://openalex.org/W3174529902","https://openalex.org/W3187908937","https://openalex.org/W3201174429","https://openalex.org/W3205706264","https://openalex.org/W4214512541","https://openalex.org/W4214686755","https://openalex.org/W4226109952","https://openalex.org/W4242577057","https://openalex.org/W4280546523","https://openalex.org/W4280633999","https://openalex.org/W4281660701","https://openalex.org/W4308083739","https://openalex.org/W4318541535","https://openalex.org/W4360831828","https://openalex.org/W4360831846","https://openalex.org/W4366341968","https://openalex.org/W4380874786","https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4389518760","https://openalex.org/W4391827186","https://openalex.org/W4394998892","https://openalex.org/W4395111944","https://openalex.org/W4401211807","https://openalex.org/W4402671766","https://openalex.org/W4404132964","https://openalex.org/W6677103964","https://openalex.org/W6677580257","https://openalex.org/W6696405057","https://openalex.org/W6696798448","https://openalex.org/W6719768283","https://openalex.org/W6720242923","https://openalex.org/W6727099177","https://openalex.org/W6745245109","https://openalex.org/W6755207826","https://openalex.org/W6788175385","https://openalex.org/W6790521546","https://openalex.org/W6810081322","https://openalex.org/W6810610777","https://openalex.org/W6811340617","https://openalex.org/W6838633097","https://openalex.org/W6842258392","https://openalex.org/W6842527641","https://openalex.org/W6846164622","https://openalex.org/W6847118041","https://openalex.org/W6847478871","https://openalex.org/W6848451824","https://openalex.org/W6850625674","https://openalex.org/W6850927664","https://openalex.org/W6852927819","https://openalex.org/W6853048723","https://openalex.org/W6853192989","https://openalex.org/W6853251322","https://openalex.org/W6853804809","https://openalex.org/W6854866820","https://openalex.org/W6856696905","https://openalex.org/W6857288518","https://openalex.org/W6857799723","https://openalex.org/W6860936625","https://openalex.org/W6861393904","https://openalex.org/W6861839547","https://openalex.org/W6862025885","https://openalex.org/W6862187050","https://openalex.org/W6862909604","https://openalex.org/W6864878861","https://openalex.org/W6864879674","https://openalex.org/W6866336322","https://openalex.org/W6872946721","https://openalex.org/W6873261103","https://openalex.org/W6875513029","https://openalex.org/W6893640197","https://openalex.org/W7052104438"],"related_works":["https://openalex.org/W2253992237","https://openalex.org/W624076816","https://openalex.org/W849430572","https://openalex.org/W4327546585","https://openalex.org/W2285878319","https://openalex.org/W594861393","https://openalex.org/W2991307080","https://openalex.org/W3177044606","https://openalex.org/W2411923897","https://openalex.org/W2244036508"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4,63],"one":[5],"of":[6,31,119,150],"the":[7,43,51,59,80,112,173,182,234,243,269],"most":[8],"important":[9],"killer":[10],"computer":[11],"applications.":[12],"The":[13],"recent":[14],"algorithmic":[15],"advancement":[16],"proposes":[17],"a":[18,26,34,37,93,116,135,141,147,168,209,220,229],"fine-grained":[19],"group-wise":[20,235],"quantization":[21,204,222,240],"for":[22,84,121,177,193],"LLMs,":[23],"which":[24,122],"treats":[25],"small":[27,103],"set":[28],"(e.g.,":[29],"64)":[30],"values":[32],"in":[33,188],"tensor":[35],"as":[36],"compression":[38],"unit.":[39,223],"It":[40],"effectively":[41],"preserves":[42],"model":[44],"accuracy":[45],"without":[46],"retraining,":[47],"and":[48,77,153,218,237,241,259],"has":[49],"become":[50],"standard":[52],"approach":[53],"to":[54,71,74,128,159,171,214,255,263,268],"efficiently":[55,215],"deploy":[56],"LLMs.":[57,85],"On":[58],"other":[60],"hand,":[61],"there":[62],"works":[64],"that":[65,96],"propose":[66,133,200],"various":[67],"adaptive":[68,124,137],"data":[69,125,151,175],"types":[70,126],"better":[72],"adapt":[73],"different":[75,98,108],"distributions":[76],"further":[78,191],"reduce":[79],"required":[81],"bit":[82],"length":[83],"In":[86],"this":[87,130],"work,":[88],"our":[89],"detailed":[90],"analysis":[91],"unveils":[92],"key":[94],"finding":[95],"while":[97],"tensors":[99],"exhibit":[100],"similar":[101],"distributions,":[102],"groups":[104],"can":[105],"have":[106],"markedly":[107],"distributions.":[109],"As":[110],"such,":[111],"group-level":[113],"diversity":[114],"requires":[115],"new":[117],"level":[118],"adaptivity":[120],"existing":[123],"fail":[127],"provide.In":[129],"paper,":[131],"we":[132,166,199,207],"MANT,":[134,165],"mathematically":[136],"numeric":[138],"type,":[139],"featuring":[140],"more":[142,154],"flexible":[143],"encoding":[144],"paradigm":[145],"with":[146],"wider":[148],"range":[149],"distribution":[152],"efficient":[155,202],"decoding-computation":[156],"fusion":[157],"mechanism":[158],"address":[160],"these":[161,226],"challenges.":[162,245],"Based":[163],"on":[164,250],"develop":[167],"supporting":[169],"framework":[170],"assign":[172],"appropriate":[174],"type":[176],"each":[178],"group":[179],"adaptively.":[180],"Meanwhile,":[181],"dynamically":[183],"generated":[184],"Key-Value":[185],"(KV)":[186],"caches":[187],"LLMs":[189],"introduce":[190],"complexity":[192],"real-time":[194,203,221],"quantization.":[195],"To":[196],"tackle":[197],"this,":[198],"an":[201],"mechanism.":[205],"Besides,":[206],"implement":[208],"specific":[210],"processing":[211],"element":[212],"(PE)":[213],"support":[216],"MANT":[217,232],"incorporate":[219],"By":[224],"integrating":[225],"components":[227],"into":[228],"systolic":[230],"array,":[231],"unifies":[233],"weight":[236],"KV":[238],"cache":[239],"addresses":[242],"associated":[244],"Our":[246],"evaluation":[247],"shows":[248],"achieving,":[249],"average,":[251],"2.99":[252],"\u00d7":[253,261],"(up":[254,262],"4.46":[256],"\u00d7)":[257,265],"speedup":[258],"2.81":[260],"4.10":[264],"energy":[266],"reduction":[267],"state-of-the-art":[270],"LLM":[271],"accelerator.":[272]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":7}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
