{"id":"https://openalex.org/W4402897324","doi":"https://doi.org/10.1109/jcc62314.2024.00017","title":"FP4-Quantization: Lossless 4bit Quantization for Large Language Models","display_name":"FP4-Quantization: Lossless 4bit Quantization for Large Language Models","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402897324","doi":"https://doi.org/10.1109/jcc62314.2024.00017"},"language":"en","primary_location":{"id":"doi:10.1109/jcc62314.2024.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcc62314.2024.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Joint Cloud Computing (JCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115694812","display_name":"Jie Wang","orcid":"https://orcid.org/0000-0002-3879-0971"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jie Wang","raw_affiliation_strings":["National University of Defense Technology,College of Computer,Changsha,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012583779","display_name":"Huanxi Liu","orcid":"https://orcid.org/0000-0002-4284-4012"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huanxi Liu","raw_affiliation_strings":["National University of Defense Technology,College of Computer,Changsha,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039795290","display_name":"Dawei Feng","orcid":"https://orcid.org/0000-0002-7587-8905"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Feng","raw_affiliation_strings":["National University of Defense Technology,College of Computer,Changsha,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069838077","display_name":"Jie Ding","orcid":"https://orcid.org/0000-0002-3584-6140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie Ding","raw_affiliation_strings":["Iflytek,R&#x0026;D Group,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Iflytek,R&#x0026;D Group,Hefei,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088885490","display_name":"Bo Ding","orcid":"https://orcid.org/0000-0002-1236-8318"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Ding","raw_affiliation_strings":["National University of Defense Technology,College of Computer,Changsha,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5115694812"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.2588,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83437173,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"61","last_page":"67"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4406000077724457,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4406000077724457,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.3806999921798706,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.37630000710487366,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8056260347366333},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.7076582908630371},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6688409447669983},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2982442378997803},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.241744726896286}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8056260347366333},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.7076582908630371},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6688409447669983},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2982442378997803},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.241744726896286}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jcc62314.2024.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcc62314.2024.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Joint Cloud Computing (JCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1632114991","https://openalex.org/W2890894339","https://openalex.org/W2963015836","https://openalex.org/W2964003909","https://openalex.org/W2970062726","https://openalex.org/W2996908057","https://openalex.org/W2998617917","https://openalex.org/W4212774754","https://openalex.org/W4292119927","https://openalex.org/W4292779060","https://openalex.org/W4298422451","https://openalex.org/W4389524393","https://openalex.org/W6757817989","https://openalex.org/W6778883912","https://openalex.org/W6782879696","https://openalex.org/W6842258392"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W3106969033","https://openalex.org/W2186939576","https://openalex.org/W2357988910","https://openalex.org/W2948148442","https://openalex.org/W2377158164","https://openalex.org/W2187600494","https://openalex.org/W2114030128","https://openalex.org/W2135707701"],"abstract_inverted_index":{"Large":[0],"language":[1],"models(LLMs)":[2],"have":[3],"demonstrated":[4],"exceptional":[5],"performance":[6],"across":[7],"a":[8,32,54,65,74,91],"wide":[9],"range":[10],"of":[11,41,47,86,99,110,133],"tasks.":[12],"However,":[13],"their":[14,21],"extensive":[15],"computational":[16],"and":[17],"storage":[18],"requirements":[19],"hinder":[20],"widespread":[22],"deployment.":[23],"To":[24],"address":[25],"this,":[26],"low-bit":[27],"quantization":[28,49,69,104,128],"has":[29],"emerged":[30],"as":[31],"highly":[33],"effective":[34],"approach":[35],"to":[36,102],"reducing":[37],"the":[38,44,82,108],"inference":[39],"cost":[40],"LLMs.":[42,87],"Nevertheless,":[43],"existing":[45],"repertoire":[46],"4-bit":[48,67,75,112,126],"techniques":[50],"is":[51],"plagued":[52],"by":[53],"substantial":[55],"decline":[56],"in":[57],"model":[58],"precision.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63],"introduce":[64],"novel":[66],"weight":[68,83,127],"method,":[70],"FP4-Quantization,":[71],"which":[72],"leverages":[73],"floating-point(FP4)":[76],"representation":[77],"that":[78,123],"aligns":[79],"better":[80],"with":[81,129],"distribution":[84],"characteristics":[85],"Furthermore,":[88],"it":[89],"incorporates":[90],"Low-Rank":[92],"Quantization":[93],"Error":[94],"Correction(LREC),":[95],"involving":[96],"progressive":[97],"fine-tuning":[98],"low-rank":[100],"parameters":[101],"rectify":[103],"errors,":[105],"thereby":[106],"enabling":[107],"achievement":[109],"precision-preserving":[111],"weight-only":[113],"quantization.":[114],"Our":[115],"Experimental":[116],"results":[117],"on":[118],"multiple":[119],"zero-shot":[120],"tasks":[121],"demonstrate":[122],"FP4-Quantization":[124],"achieves":[125],"an":[130],"accuracy":[131],"degradation":[132],"less":[134],"than":[135],"0.5%.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-23T08:51:43.019350","created_date":"2025-10-10T00:00:00"}
