{"id":"https://openalex.org/W4416252219","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228337","title":"Understanding the Difficulty of Low-Precision Post-Training Quantization for LLMs","display_name":"Understanding the Difficulty of Low-Precision Post-Training Quantization for LLMs","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252219","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228337"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228337","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228337","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073664360","display_name":"Zifei Xu","orcid":"https://orcid.org/0000-0003-2661-517X"},"institutions":[{"id":"https://openalex.org/I4210158408","display_name":"Matrix Research (United States)","ror":"https://ror.org/04mw0p229","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158408"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zifei Xu","raw_affiliation_strings":["d-Matrix,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"d-Matrix,Santa Clara,USA","institution_ids":["https://openalex.org/I4210158408"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061847219","display_name":"Sayeh Sharify","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158408","display_name":"Matrix Research (United States)","ror":"https://ror.org/04mw0p229","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158408"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sayeh Sharify","raw_affiliation_strings":["d-Matrix,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"d-Matrix,Santa Clara,USA","institution_ids":["https://openalex.org/I4210158408"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048530115","display_name":"Wanzin Yazar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158408","display_name":"Matrix Research (United States)","ror":"https://ror.org/04mw0p229","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158408"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wanzin Yazar","raw_affiliation_strings":["d-Matrix,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"d-Matrix,Santa Clara,USA","institution_ids":["https://openalex.org/I4210158408"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090000092","display_name":"Tristan J. Webb","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158408","display_name":"Matrix Research (United States)","ror":"https://ror.org/04mw0p229","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158408"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tristan Webb","raw_affiliation_strings":["d-Matrix,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"d-Matrix,Santa Clara,USA","institution_ids":["https://openalex.org/I4210158408"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[{"id":"https://openalex.org/I4210158408","display_name":"Matrix Research (United States)","ror":"https://ror.org/04mw0p229","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158408"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":["d-Matrix,Santa Clara,USA"],"affiliations":[{"raw_affiliation_string":"d-Matrix,Santa Clara,USA","institution_ids":["https://openalex.org/I4210158408"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5073664360"],"corresponding_institution_ids":["https://openalex.org/I4210158408"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1951769,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.17589999735355377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.17589999735355377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.13300000131130219,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07280000299215317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8658000230789185},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.7111999988555908},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.557200014591217},{"id":"https://openalex.org/keywords/phenomenon","display_name":"Phenomenon","score":0.5059999823570251},{"id":"https://openalex.org/keywords/linde\u2013buzo\u2013gray-algorithm","display_name":"Linde\u2013Buzo\u2013Gray algorithm","score":0.3280999958515167}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8658000230789185},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.7111999988555908},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.557200014591217},{"id":"https://openalex.org/C50335755","wikidata":"https://www.wikidata.org/wiki/Q483247","display_name":"Phenomenon","level":2,"score":0.5059999823570251},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4681999981403351},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46799999475479126},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4049000144004822},{"id":"https://openalex.org/C93372532","wikidata":"https://www.wikidata.org/wiki/Q6552455","display_name":"Linde\u2013Buzo\u2013Gray algorithm","level":3,"score":0.3280999958515167},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2996000051498413},{"id":"https://openalex.org/C164752517","wikidata":"https://www.wikidata.org/wiki/Q5570875","display_name":"Global optimization","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2662000060081482},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228337","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228337","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2160815625","https://openalex.org/W3166846774","https://openalex.org/W3174770825","https://openalex.org/W4205991051","https://openalex.org/W4367595583","https://openalex.org/W4386187806","https://openalex.org/W4393147284","https://openalex.org/W4406650295"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,127],"of":[3,88,97,111,118,125],"high":[4],"parameter":[5],"counts":[6],"are":[7],"computationally":[8],"expensive,":[9],"yet":[10],"can":[11,27],"be":[12,28],"made":[13],"much":[14],"more":[15],"efficient":[16],"by":[17,34,44],"compressing":[18],"their":[19],"weights":[20],"to":[21],"very":[22,80,129],"low":[23,130],"numerical":[24,77],"precision.":[25,131],"This":[26],"achieved":[29],"either":[30],"through":[31,41],"post-training":[32,89],"quantization":[33,38,90,113],"minimizing":[35,45],"local,":[36],"layer-wise":[37],"errors,":[39],"or":[40],"quantization-aware":[42,120],"fine-tuning":[43],"the":[46,57,61,69,76,98,116,123],"global":[47,101],"loss":[48],"function.":[49],"In":[50],"this":[51,86],"study,":[52],"we":[53],"discovered":[54],"that,":[55],"under":[56],"same":[58],"data":[59],"constraint,":[60],"former":[62],"approach":[63],"nearly":[64],"always":[65],"fared":[66],"worse":[67],"than":[68],"latter,":[70],"a":[71],"phenomenon":[72],"particularly":[73],"prominent":[74],"when":[75],"precision":[78],"is":[79],"low.":[81],"We":[82],"further":[83],"showed":[84],"that":[85],"difficulty":[87],"arose":[91],"from":[92],"stark":[93],"misalignment":[94],"between":[95],"optimization":[96],"local":[99,112],"and":[100,115],"objective":[102],"functions.":[103],"Our":[104],"findings":[105],"suggested":[106],"limited":[107],"utility":[108],"in":[109,122],"minimization":[110],"error":[114],"importance":[117],"direct":[119],"fine-tuning,":[121],"regime":[124],"large":[126],"at":[128]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
