{"id":"https://openalex.org/W4412888358","doi":"https://doi.org/10.18653/v1/2025.findings-acl.554","title":"RQT: Hierarchical Residual Quantization for Multi-Model Compression","display_name":"RQT: Hierarchical Residual Quantization for Multi-Model Compression","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888358","doi":"https://doi.org/10.18653/v1/2025.findings-acl.554"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.554","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.554","pdf_url":"https://aclanthology.org/2025.findings-acl.554.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.554.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chen Tianqi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen Tianqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079244166","display_name":"Peisong Wang","orcid":"https://orcid.org/0000-0002-6384-0280"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peisong Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062221157","display_name":"Weixiang Xu","orcid":"https://orcid.org/0000-0002-3083-794X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weixiang Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101196275","display_name":"Zeyu Zhu","orcid":"https://orcid.org/0009-0001-5405-0295"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeyu Zhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5042885658","display_name":"Jianwei Cheng","orcid":"https://orcid.org/0000-0003-4915-6295"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Cheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14765671,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10644","last_page":"10660"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9726999998092651,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9726999998092651,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6581787467002869},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5502564311027527},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5319818258285522},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4138285219669342},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.30600467324256897}],"concepts":[{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6581787467002869},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5502564311027527},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5319818258285522},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4138285219669342},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30600467324256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.554","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.554","pdf_url":"https://aclanthology.org/2025.findings-acl.554.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.554","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.554","pdf_url":"https://aclanthology.org/2025.findings-acl.554.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3352541487","display_name":null,"funder_award_id":"BK20243051","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"}],"funders":[{"id":"https://openalex.org/F4320321605","display_name":"Government of Jiangsu Province","ror":"https://ror.org/004svx814"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888358.pdf","grobid_xml":"https://content.openalex.org/works/W4412888358.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W2560215812","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W326456911"],"abstract_inverted_index":{"Delta":[0],"compression":[1],"methods":[2],"focus":[3],"on":[4,99,125],"efficiently":[5],"serving":[6],"multiple":[7],"uniquely":[8],"fine-tuned":[9,84,126],"models,":[10],"each":[11,109],"tailored":[12],"to":[13,40,49,120,148],"specific":[14],"tasks":[15],"and":[16,28,103,115,132],"user":[17],"requirements.These":[18],"approaches":[19],"decompose":[20],"a":[21,25,72,89,93],"finetuned":[22],"LLM":[23],"into":[24],"base":[26],"model":[27,54],"corresponding":[29],"delta":[30],"weights,":[31],"which":[32,108],"are":[33],"compressed":[34],"using":[35],"low-rank":[36],"or":[37],"low-bit":[38,79],"representations":[39],"reduce":[41],"storage":[42],"costs.However,":[43],"their":[44],"effectiveness":[45],"is":[46],"highly":[47],"sensitive":[48],"the":[50,53,60,63,67,112],"magnitude":[51],"of":[52,62,96,144,158],"deltas-a":[55],"factor":[56],"directly":[57],"influenced":[58],"by":[59],"scale":[61],"training":[64],"data.We":[65],"propose":[66],"Residual":[68],"Quantization":[69],"Tree":[70],"(RQT),":[71],"hierarchical":[73],"quantization":[74,113],"framework":[75],"that":[76,137],"automatically":[77],"shares":[78],"integer":[80],"weights":[81],"across":[82,128],"similar":[83],"models.The":[85],"RQT":[86,124,138],"construction":[87],"employs":[88],"two-phase":[90],"greedy":[91],"algorithm:":[92],"bottom-up":[94],"aggregation":[95],"models":[97,127],"based":[98],"weight":[100],"matrix":[101],"similarity":[102],"top-down":[104],"residual":[105,118],"quantization,":[106],"in":[107],"node":[110],"optimizes":[111],"parameters":[114],"then":[116],"delegates":[117],"errors":[119],"child":[121],"nodes.We":[122],"evaluate":[123],"mathematics,":[129],"coding,":[130],"chatbot,":[131],"Chinese":[133],"LLMs.The":[134],"results":[135],"show":[136],"achieves":[139],"an":[140,155],"average":[141],"accuracy":[142],"degradation":[143],"approximately":[145],"3%":[146],"(comparable":[147],"previous":[149],"4-bit":[150],"post-training":[151],"quantization)":[152],"while":[153],"maintaining":[154],"effective":[156],"bitwidth":[157],"around":[159],"2":[160],"bits.":[161]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
