{"id":"https://openalex.org/W7124969355","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331377","title":"QER: Quantized Low-Rank Error Reconstructor for LLM Low-Bitwidth Quantization","display_name":"QER: Quantized Low-Rank Error Reconstructor for LLM Low-Bitwidth Quantization","publication_year":2025,"publication_date":"2025-11-14","ids":{"openalex":"https://openalex.org/W7124969355","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331377"},"language":null,"primary_location":{"id":"doi:10.1109/cloudcom67567.2025.11331377","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331377","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009545714","display_name":"Shoukai Xu","orcid":"https://orcid.org/0000-0003-2734-7373"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shoukai Xu","raw_affiliation_strings":["South China University of Technology,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123428591","display_name":"Runhao Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runhao Zeng","raw_affiliation_strings":["SHENZHEN MSU-BIT UNIVERSITY,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"SHENZHEN MSU-BIT UNIVERSITY,Shenzhen,China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085163597","display_name":"Z. L. Zhang","orcid":"https://orcid.org/0009-0003-8227-7803"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyang Zhang","raw_affiliation_strings":["South China University of Technology,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123429680","display_name":"Hao Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Huang","raw_affiliation_strings":["South China University of Technology,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059386507","display_name":"Qiang Zheng","orcid":"https://orcid.org/0000-0001-9422-7430"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingfang Zheng","raw_affiliation_strings":["PengCheng Laboratory,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"PengCheng Laboratory,Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055543314","display_name":"Xiangyuan Lan","orcid":"https://orcid.org/0000-0001-8564-0346"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyuan Lan","raw_affiliation_strings":["PengCheng Laboratory,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"PengCheng Laboratory,Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123412585","display_name":"Yaowei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaowei Wang","raw_affiliation_strings":["PengCheng Laboratory,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"PengCheng Laboratory,Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123388742","display_name":"Mingkui Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingkui Tan","raw_affiliation_strings":["South China University of Technology,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5009545714"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.6731904,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19439999759197235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19439999759197235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1867000013589859,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.0966000035405159,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.7979999780654907},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6514999866485596},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6043999791145325},{"id":"https://openalex.org/keywords/round-off-error","display_name":"Round-off error","score":0.49000000953674316},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.423799991607666},{"id":"https://openalex.org/keywords/error-detection-and-correction","display_name":"Error detection and correction","score":0.36800000071525574},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.34310001134872437}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7979999780654907},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6748999953269958},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6514999866485596},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6043999791145325},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5076000094413757},{"id":"https://openalex.org/C61005703","wikidata":"https://www.wikidata.org/wiki/Q2145211","display_name":"Round-off error","level":2,"score":0.49000000953674316},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.423799991607666},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3828999996185303},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.36800000071525574},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.34310001134872437},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.3310000002384186},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.32190001010894775},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.2750000059604645},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2700999975204468},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cloudcom67567.2025.11331377","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331377","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2738634320","display_name":null,"funder_award_id":"62402252,62536003","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W3168126734","https://openalex.org/W3185341429","https://openalex.org/W4402670692","https://openalex.org/W4408182971","https://openalex.org/W4409362928","https://openalex.org/W4409362992","https://openalex.org/W4412945475","https://openalex.org/W4415796640"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"achieved":[5],"remarkable":[6],"success":[7],"but":[8],"face":[9],"significant":[10],"deployment":[11],"challenges":[12],"in":[13,54,121],"cloud":[14,43],"and":[15,23,37,86,139,162,195,225,245,250],"edge":[16],"environments":[17],"due":[18],"to":[19,33,175,181,210,223,235],"their":[20,60],"massive":[21],"computational":[22],"storage":[24],"requirements.":[25],"Model":[26],"quantization":[27,153,244],"serves":[28],"as":[29],"a":[30,77,106],"key":[31],"solution":[32],"enhance":[34],"the":[35,82,126,133,152,241],"scalability":[36],"efficiency":[38],"of":[39,128,155],"LLMs":[40],"within":[41,186],"distributed":[42],"platforms.":[44],"Existing":[45],"Post-Training":[46],"Quantization":[47],"(PTQ)":[48],"methods":[49],"often":[50],"exhibit":[51],"suboptimal":[52],"performance":[53,202],"low-bit":[55],"settings.":[56],"To":[57,100],"further":[58],"improve":[59],"precision,":[61],"Quantization-Aware":[62],"Training":[63],"(QAT)":[64],"combined":[65],"with":[66,132,212],"Low-Rank":[67],"Adaptation":[68],"(LoRA)":[69],"has":[70],"been":[71],"explored":[72],"for":[73,112],"error":[74,213],"correction.":[75],"However,":[76],"critical":[78],"issue":[79],"is":[80],"that":[81],"quantized":[83,134,159],"base":[84,135],"model":[85],"full-precision":[87],"LoRA":[88],"parameters":[89,131,157],"suffer":[90],"from":[91],"precision":[92],"mismatch,":[93],"introducing":[94],"additional":[95],"errors":[96,148,154,165,171],"during":[97,166],"weight":[98],"merging.":[99,168],"address":[101],"these":[102],"challenges,":[103],"we":[104],"propose":[105],"Quantized":[107],"Low-rank":[108],"Error":[109],"Reconstructor":[110],"(QER)":[111],"LLM":[113,253],"low-bitwidth":[114,122,167],"quantization.":[115],"QER":[116,145,156,176,199,215,239],"first":[117],"enables":[118],"lossless":[119],"merging":[120],"format":[123],"by":[124,219,229],"aligning":[125],"bitwidth":[127],"its":[129],"low-rank":[130,160,246],"parameters,":[136,177],"eliminating":[137],"dequantization":[138],"requantization":[140],"steps.":[141],"Through":[142],"this":[143],"process,":[144],"reconstructs":[146],"original":[147],"into":[149],"two":[150,170],"components:":[151],"(i.e.,":[158],"parameters)":[161],"potential":[163],"overflow":[164],"These":[169],"are":[172],"directly":[173],"related":[174],"making":[178],"them":[179],"easier":[180],"optimize":[182],"via":[183],"gradient-based":[184],"updates":[185],"an":[187],"error-aware":[188],"training":[189,197],"framework.":[190],"Requiring":[191],"only":[192],"128":[193],"samples":[194],"1":[196],"epoch,":[198],"demonstrates":[200],"superior":[201],"on":[203,237],"LLaMA-1/2":[204],"families.":[205],"In":[206],"4-bit":[207],"quantization,":[208],"compared":[209],"QLLM":[211],"correction,":[214],"reduces":[216],"average":[217,227],"perplexity":[218],"13.8%":[220],"(from":[221,233],"10.97":[222],"9.45)":[224],"improves":[226],"accuracy":[228],"3.01":[230],"percentage":[231],"points":[232],"51.84%":[234],"54.85%)":[236],"LLaMA-1-7B.":[238],"bridges":[240],"gap":[242],"between":[243],"adaptation,":[247],"enabling":[248],"efficient":[249],"accurate":[251],"low-precision":[252],"deployment.":[254]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-21T00:00:00"}
