{"id":"https://openalex.org/W4387158310","doi":"https://doi.org/10.48550/arxiv.2309.15531","title":"Rethinking Channel Dimensions to Isolate Outliers for Low-bit Weight Quantization of Large Language Models","display_name":"Rethinking Channel Dimensions to Isolate Outliers for Low-bit Weight Quantization of Large Language Models","publication_year":2023,"publication_date":"2023-09-27","ids":{"openalex":"https://openalex.org/W4387158310","doi":"https://doi.org/10.48550/arxiv.2309.15531"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.15531","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.15531","pdf_url":"https://arxiv.org/pdf/2309.15531","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.15531","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056065406","display_name":"Jung Hwan Heo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Heo, Jung Hwan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100762110","display_name":"Jeonghoon Kim","orcid":"https://orcid.org/0000-0002-6068-6476"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jeonghoon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015122589","display_name":"Beomseok Kwon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwon, Beomseok","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090974462","display_name":"Byeongwook Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Byeongwook","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055490416","display_name":"Se Jung Kwon","orcid":"https://orcid.org/0000-0003-3456-9038"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwon, Se Jung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101652725","display_name":"Dongsoo Lee","orcid":"https://orcid.org/0000-0002-4155-6940"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Dongsoo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5056065406"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8163193464279175},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.7585420608520508},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5971688628196716},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.46746423840522766},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.309700608253479}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8163193464279175},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.7585420608520508},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5971688628196716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.46746423840522766},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.309700608253479}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.15531","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.15531","pdf_url":"https://arxiv.org/pdf/2309.15531","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2309.15531","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2309.15531","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.15531","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.15531","pdf_url":"https://arxiv.org/pdf/2309.15531","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7400000095367432,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387158310.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W3006513224","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2046456988"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"recently":[5],"demonstrated":[6],"remarkable":[7],"success":[8],"across":[9,178],"various":[10,156,179],"tasks.":[11],"However,":[12],"efficiently":[13],"serving":[14],"LLMs":[15],"has":[16],"been":[17],"a":[18,39,47,65,116,139,148],"challenge":[19,48],"due":[20,49],"to":[21,50,155,187,194],"the":[22,56,81,90,96,100,106,109,162],"large":[23],"memory":[24],"bottleneck,":[25],"specifically":[26],"in":[27,108],"small":[28],"batch":[29],"inference":[30],"settings":[31],"(e.g.":[32],"mobile":[33],"devices).":[34],"Weight-only":[35],"quantization":[36,45,72,127,137,150],"can":[37,112,153],"be":[38],"promising":[40],"approach,":[41],"but":[42],"sub-4":[43],"bit":[44],"remains":[46],"large-magnitude":[51],"activation":[52,93,122],"outliers.":[53],"To":[54],"mitigate":[55],"undesirable":[57],"outlier":[58],"effect,":[59],"we":[60,143],"first":[61],"propose":[62,144],"per-IC":[63,136],"quantization,":[64],"simple":[66],"yet":[67],"effective":[68],"method":[69,86],"that":[70,92,121,152],"creates":[71],"groups":[73],"within":[74,115],"each":[75],"input":[76,97],"channel":[77],"(IC)":[78],"rather":[79],"than":[80],"conventional":[82],"per-output-channel":[83],"(per-OC).":[84],"Our":[85],"is":[87,200],"motivated":[88],"by":[89,166],"observation":[91],"outliers":[94,114,123],"affect":[95],"dimension":[98],"of":[99,164],"weight":[101,131,157],"matrix,":[102],"so":[103],"similarly":[104],"grouping":[105],"weights":[107],"IC":[110],"direction":[111],"isolate":[113],"group.":[117],"We":[118,160],"also":[119,133],"find":[120],"do":[124],"not":[125],"dictate":[126],"difficulty,":[128],"and":[129,173,191],"inherent":[130],"sensitivities":[132],"exist.":[134],"With":[135],"as":[138,171],"new":[140],"outlier-friendly":[141],"scheme,":[142],"Adaptive":[145],"Dimensions":[146],"(AdaDim),":[147],"versatile":[149],"framework":[151],"adapt":[154],"sensitivity":[158],"patterns.":[159],"demonstrate":[161],"effectiveness":[163],"AdaDim":[165],"augmenting":[167],"prior":[168],"methods":[169],"such":[170],"Round-To-Nearest":[172],"GPTQ,":[174],"showing":[175],"significant":[176],"improvements":[177],"language":[180],"modeling":[181],"benchmarks":[182],"for":[183],"both":[184],"base":[185],"(up":[186,193],"+4.7%":[188],"on":[189,196],"MMLU)":[190],"instruction-tuned":[192],"+10%":[195],"HumanEval)":[197],"LLMs.":[198],"Code":[199],"available":[201],"at":[202],"https://github.com/johnheo/adadim-llm":[203]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
