{"id":"https://openalex.org/W4409150022","doi":"https://doi.org/10.1145/3690624.3709287","title":"SEPTQ: A Simple and Effective Post-Training Quantization Paradigm for Large Language Models","display_name":"SEPTQ: A Simple and Effective Post-Training Quantization Paradigm for Large Language Models","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409150022","doi":"https://doi.org/10.1145/3690624.3709287"},"language":"en","primary_location":{"id":"doi:10.1145/3690624.3709287","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3690624.3709287","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2604.10091","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029498141","display_name":"Han Liu","orcid":"https://orcid.org/0000-0001-6921-2050"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Han Liu","raw_affiliation_strings":["Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haotian Gao","orcid":"https://orcid.org/0009-0008-5333-2769"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haotian Gao","raw_affiliation_strings":["Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100403816","display_name":"Xiaotong Zhang","orcid":"https://orcid.org/0000-0002-5013-8476"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaotong Zhang","raw_affiliation_strings":["Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109695129","display_name":"Changya Li","orcid":null},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changya Li","raw_affiliation_strings":["Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102424079","display_name":"Feng Zhang","orcid":"https://orcid.org/0000-0002-8373-9366"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100391896","display_name":"Wei Wang","orcid":"https://orcid.org/0000-0002-1717-5785"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Shenzhen MSU-BIT University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I4388482657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001030192","display_name":"Fenglong Ma","orcid":"https://orcid.org/0000-0002-4999-0303"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fenglong Ma","raw_affiliation_strings":["The Pennsylvania State University, University Park, USA"],"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, University Park, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102728606","display_name":"Hong Yu","orcid":"https://orcid.org/0000-0003-4807-1812"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Yu","raw_affiliation_strings":["Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5029498141"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":2.271,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88173036,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"812","last_page":"823"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9495999813079834,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7257739305496216},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.6848782896995544},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5763950347900391},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4421626925468445},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43447160720825195},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.41511625051498413},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3307405114173889},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24320775270462036}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7257739305496216},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.6848782896995544},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5763950347900391},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4421626925468445},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43447160720825195},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.41511625051498413},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3307405114173889},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24320775270462036},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3690624.3709287","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3690624.3709287","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2604.10091","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2604.10091","pdf_url":"https://arxiv.org/pdf/2604.10091","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2604.10091","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2604.10091","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2604.10091","pdf_url":"https://arxiv.org/pdf/2604.10091","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W569478347","https://openalex.org/W2946609015","https://openalex.org/W2981852735","https://openalex.org/W2998617917","https://openalex.org/W3194676777","https://openalex.org/W4221138270","https://openalex.org/W4288089799","https://openalex.org/W6600168311"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2382521049","https://openalex.org/W2233261550","https://openalex.org/W1585007175","https://openalex.org/W2144385241","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W4300101996","https://openalex.org/W2165950148"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,27,198],"(LLMs)":[3],"have":[4],"shown":[5],"remarkable":[6],"performance":[7,93],"in":[8,57,74,127,136,204,217],"various":[9,85,192],"domains,":[10],"but":[11],"they":[12],"are":[13],"constrained":[14],"by":[15],"massive":[16],"computational":[17],"and":[18,44,61,89,108,131,154,182,186],"storage":[19],"costs.":[20],"Quantization,":[21],"an":[22],"effective":[23,109],"technique":[24],"for":[25,65,113,124],"compressing":[26],"to":[28,152,202],"fit":[29],"resource-limited":[30],"devices":[31],"while":[32],"preserving":[33],"generative":[34],"quality,":[35],"encompasses":[36],"two":[37,180],"primary":[38],"methods:":[39],"quantization":[40,46,76,97,111,134,176,206,219],"aware":[41],"training":[42,59],"(QAT)":[43],"post-training":[45,110,175],"(PTQ).":[47],"QAT":[48],"involves":[49],"additional":[50],"retraining":[51],"or":[52],"fine-tuning,":[53],"thus":[54],"inevitably":[55],"resulting":[56],"high":[58],"cost":[60],"making":[62],"it":[63,142],"unsuitable":[64],"LLMs.":[66],"Consequently,":[67],"PTQ":[68,80],"has":[69],"become":[70],"the":[71,101,121,128,133,144,149,156,161,174,184],"research":[72],"hotspot":[73],"recent":[75],"methods.":[77],"However,":[78],"existing":[79],"methods":[81],"usually":[82],"rely":[83],"on":[84,191],"complex":[86],"computation":[87],"procedures":[88],"suffer":[90],"from":[91,200],"considerable":[92],"degradation":[94],"under":[95],"low-bit":[96,218],"settings.":[98],"To":[99],"alleviate":[100],"above":[102],"issues,":[103],"we":[104],"propose":[105],"a":[106,137,195],"simple":[107],"paradigm":[112],"LLMs,":[114],"named":[115],"SEPTQ.":[116],"Specifically,":[117],"SEPTQ":[118,172,210],"first":[119],"calculates":[120],"importance":[122],"score":[123],"each":[125],"element":[126],"weight":[129,164],"matrix":[130,146,165],"determines":[132],"locations":[135,151],"static":[138],"global":[139],"manner.":[140],"Then":[141],"utilizes":[143],"mask":[145],"which":[147],"represents":[148],"important":[150],"quantize":[153],"update":[155],"associated":[157],"weights":[158],"column-by-column":[159],"until":[160],"appropriate":[162],"quantized":[163],"is":[166],"obtained.":[167],"Compared":[168],"with":[169],"previous":[170],"methods,":[171],"simplifies":[173],"procedure":[177],"into":[178],"only":[179],"steps,":[181],"considers":[183],"effectiveness":[185],"efficiency":[187],"simultaneously.":[188],"Experimental":[189],"results":[190],"datasets":[193],"across":[194],"suite":[196],"of":[197],"ranging":[199],"millions":[201],"billions":[203],"different":[205],"bit-levels":[207],"demonstrate":[208],"that":[209],"significantly":[211],"outperforms":[212],"other":[213],"strong":[214],"baselines,":[215],"especially":[216],"scenarios.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
