{"id":"https://openalex.org/W4413141157","doi":"https://doi.org/10.1109/tpami.2025.3599479","title":"LR <i>Quant+</i> : A Unified and Learnable Framework to Post-Training Quantization for Transformer-Based Large Foundation Models","display_name":"LR <i>Quant+</i> : A Unified and Learnable Framework to Post-Training Quantization for Transformer-Based Large Foundation Models","publication_year":2025,"publication_date":"2025-08-14","ids":{"openalex":"https://openalex.org/W4413141157","doi":"https://doi.org/10.1109/tpami.2025.3599479","pmid":"https://pubmed.ncbi.nlm.nih.gov/40811157"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3599479","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3599479","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005368623","display_name":"Jiaqi Zhao","orcid":"https://orcid.org/0000-0002-9328-9768"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiaqi Zhao","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101586447","display_name":"Chao Zeng","orcid":"https://orcid.org/0000-0001-5110-545X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Zeng","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432583","display_name":"Ming Wang","orcid":"https://orcid.org/0000-0003-0976-9871"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023420255","display_name":"Linxuan Han","orcid":"https://orcid.org/0009-0001-6606-7406"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linxuan Han","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090850708","display_name":"Yuzhang Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuzhang Shang","raw_affiliation_strings":["Illinois Institute of Technology, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, Chicago, IL, USA","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345524","display_name":"Jing Zhang","orcid":"https://orcid.org/0009-0008-1485-6025"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Miao Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038612499","display_name":"Liqiang Nie","orcid":"https://orcid.org/0000-0003-1476-0273"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqiang Nie","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5005368623"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26155181,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"47","issue":"12","first_page":"11817","last_page":"11833"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10271","display_name":"Seismic Imaging and Inversion Techniques","score":0.9229000210762024,"subfield":{"id":"https://openalex.org/subfields/1908","display_name":"Geophysics"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10271","display_name":"Seismic Imaging and Inversion Techniques","score":0.9229000210762024,"subfield":{"id":"https://openalex.org/subfields/1908","display_name":"Geophysics"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9111999869346619,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7617800235748291},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6149539351463318},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4983546733856201},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.4215097427368164},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.4072793126106262},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18718433380126953},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.15541201829910278},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09369376301765442},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07310751080513}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7617800235748291},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6149539351463318},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4983546733856201},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.4215097427368164},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4072793126106262},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18718433380126953},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.15541201829910278},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09369376301765442},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07310751080513},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3599479","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3599479","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:40811157","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40811157","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W1984685202","https://openalex.org/W2093647425","https://openalex.org/W2108591274","https://openalex.org/W2169384417","https://openalex.org/W2194775991","https://openalex.org/W2946609015","https://openalex.org/W2963518342","https://openalex.org/W2963622213","https://openalex.org/W2979382951","https://openalex.org/W2998617917","https://openalex.org/W3034368386","https://openalex.org/W3194676777","https://openalex.org/W4285601701","https://openalex.org/W4304080501","https://openalex.org/W4313069943","https://openalex.org/W4385245566","https://openalex.org/W4389523832","https://openalex.org/W4389524393","https://openalex.org/W4390873673","https://openalex.org/W4402670692","https://openalex.org/W4402684165","https://openalex.org/W4402727885","https://openalex.org/W4403081466","https://openalex.org/W4406650295","https://openalex.org/W4407857344","https://openalex.org/W4412886641","https://openalex.org/W4412887764","https://openalex.org/W4415796210"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2381393187","https://openalex.org/W2332779545","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2358060160","https://openalex.org/W2035483685"],"abstract_inverted_index":{"Post-training":[0],"quantization":[1,41,45,62,128,158,176,182,300],"(PTQ)":[2],"for":[3,42,79,90,122,291],"transformer-based":[4],"large":[5],"foundation":[6],"models":[7],"(LFMs)":[8],"significantly":[9],"accelerates":[10],"model":[11,19],"inference":[12],"and":[13,102,117,126,152,184,205,235,238,269,294,304,309],"relieves":[14],"memory":[15],"constraints,":[16],"without":[17],"incurring":[18],"training.":[20],"However,":[21],"existing":[22],"methods":[23,75],"face":[24],"three":[25],"main":[26],"issues:":[27],"1)":[28],"The":[29,58],"scaling":[30,143],"factors,":[31],"which":[32,51,93,145,263],"are":[33,47,76,146],"commonly":[34],"used":[35],"in":[36],"scale":[37],"reparameterization":[38],"based":[39,124,191],"weight-activation":[40,293],"mitigating":[43],"the":[44,68,181,193,211,254,266,276],"errors,":[46],"mostly":[48],"hand-crafted":[49],"defined":[50,64],"may":[52],"lead":[53,173],"to":[54,140,174,232,244],"suboptimal":[55],"results;":[56],"2)":[57],"formulation":[59],"of":[60,95,157,196,203,213,278],"current":[61],"error":[63,183,267],"by":[65,148],"L2-norm":[66],"ignores":[67],"directional":[69,248],"shifts":[70],"after":[71],"quantization;":[72],"3)":[73],"Most":[74],"devised":[77],"tailored":[78],"single":[80],"scenario,":[81],"i.e.,":[82,302],"only":[83,88,166],"evaluated":[84],"on":[85,99,168,192,306],"LLMs":[86],"or":[87],"designed":[89],"weight-only":[91,295],"quantization,":[92,296],"lacks":[94],"a":[96,103,114,187,219,228,259],"comprehensive":[97],"evaluation":[98],"diverse":[100],"benchmarks":[101],"broad":[104],"application":[105],"scope.":[106],"To":[107,208],"address":[108],"these":[109],"challenges,":[110],"this":[111],"paper":[112],"introduces":[113],"unified":[115,286],"Learnable":[116],"Robust":[118],"post-training":[119],"Quantization":[120],"framework":[121,257],"transformer":[123],"LFMs":[125,290],"various":[127,289],"scenarios,":[129,301],"called":[130],"LRQuant.":[131],"First,":[132],"we":[133,162,179,217,225,252],"consider":[134],"an":[135],"efficient":[136],"block-wise":[137,255],"learnable":[138,215,241],"paradigm":[139],"find":[141,164],"optimal":[142,175],"factors":[144],"initialized":[147],"logarithmic":[149],"activation":[150],"equivalent":[151],"get":[153],"suitable":[154],"clipping":[155],"range":[156],"steps.":[159],"In":[160,250],"addition,":[161,251],"empirically":[163],"that":[165],"relying":[167],"MSE":[169,234],"loss":[170,189],"could":[171],"hardly":[172],"results,":[177],"so":[178],"reformulate":[180],"then":[185,239],"propose":[186,218,227],"novel":[188,260],"function":[190],"negative":[194],"logarithm":[195],"cosine":[197],"similarity":[198],"(NLC":[199],"loss)":[200],"between":[201],"outputs":[202],"full-precision":[204],"quantized":[206],"block.":[207],"fully":[209],"investigate":[210],"potentiality":[212],"our":[214,279],"paradigm,":[216],"more":[220],"superior":[221],"version":[222],"LRQuant+.":[223],"Specifically,":[224],"first":[226],"dynamically":[229],"weighted":[230],"scheme":[231],"balance":[233],"NLC":[236],"loss,":[237],"devise":[240],"rotation":[242],"vectors":[243],"further":[245],"directly":[246],"reduce":[247],"gaps.":[249],"improve":[253],"optimization":[256],"into":[258],"two-branch":[261],"nature":[262],"jointly":[264],"considers":[265],"propagation":[268],"homologous":[270],"reconstruction":[271],"error.":[272],"Extensive":[273],"experiments":[274],"demonstrate":[275],"superiority":[277],"LRQuantand":[280],"LRQuant+,":[281],"as":[282,284],"well":[283],"their":[285],"effectiveness":[287],"across":[288],"both":[292],"especially":[297],"under":[298],"challenging":[299],"W4A4":[303],"W2A16":[305],"LLMs,":[307],"ViTS,":[308],"MLLMs.":[310]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
