{"id":"https://openalex.org/W4405434040","doi":"https://doi.org/10.1145/3669940.3707268","title":"MVQ: Towards Efficient DNN Compression and Acceleration with Masked Vector Quantization","display_name":"MVQ: Towards Efficient DNN Compression and Acceleration with Masked Vector Quantization","publication_year":2025,"publication_date":"2025-02-06","ids":{"openalex":"https://openalex.org/W4405434040","doi":"https://doi.org/10.1145/3669940.3707268"},"language":"en","primary_location":{"id":"doi:10.1145/3669940.3707268","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3669940.3707268","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.10261","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019049217","display_name":"Shuaiting Li","orcid":"https://orcid.org/0009-0002-7726-4883"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuaiting Li","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103225929","display_name":"Chengxuan Wang","orcid":"https://orcid.org/0009-0002-6677-2145"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengxuan Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065813707","display_name":"Juncan Deng","orcid":"https://orcid.org/0000-0003-0860-4442"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juncan Deng","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043817623","display_name":"Zeyu Wang","orcid":"https://orcid.org/0000-0003-0985-4478"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022561121","display_name":"Zewen Ye","orcid":"https://orcid.org/0000-0003-3623-3554"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zewen Ye","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006022584","display_name":"Zongsheng Wang","orcid":"https://orcid.org/0000-0003-4240-4284"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongsheng Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029202186","display_name":"Haibin Shen","orcid":"https://orcid.org/0000-0002-5431-609X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibin Shen","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091073331","display_name":"Kejie Huang","orcid":"https://orcid.org/0000-0003-3722-9979"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kejie Huang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5019049217"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":4.8058,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.93994095,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"731","last_page":"745"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.7457999587059021},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.6876412630081177},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.642940104007721},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5764116048812866},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5010292530059814},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.47756272554397583},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36707085371017456},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35553449392318726},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.26264339685440063},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11321035027503967}],"concepts":[{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.7457999587059021},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.6876412630081177},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.642940104007721},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5764116048812866},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5010292530059814},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.47756272554397583},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36707085371017456},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35553449392318726},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26264339685440063},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11321035027503967},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3669940.3707268","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3669940.3707268","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.10261","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.10261","pdf_url":"https://arxiv.org/pdf/2412.10261","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.10261","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.10261","pdf_url":"https://arxiv.org/pdf/2412.10261","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2981938667","display_name":null,"funder_award_id":"Shenzhen","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4474298529","display_name":null,"funder_award_id":"62274142","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6848268994","display_name":null,"funder_award_id":"62274142","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4405434040.pdf","grobid_xml":"https://content.openalex.org/works/W4405434040.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W2124509324","https://openalex.org/W2233116163","https://openalex.org/W2442974303","https://openalex.org/W2588191434","https://openalex.org/W2606722458","https://openalex.org/W2894581376","https://openalex.org/W2942998388","https://openalex.org/W2946485102","https://openalex.org/W2965191108","https://openalex.org/W2965862774","https://openalex.org/W2971734772","https://openalex.org/W2979439447","https://openalex.org/W2979838629","https://openalex.org/W3012178976","https://openalex.org/W3096344818","https://openalex.org/W3127330041","https://openalex.org/W3135461891","https://openalex.org/W3185702163","https://openalex.org/W3198153656","https://openalex.org/W3211907200","https://openalex.org/W3213528054","https://openalex.org/W4224612674","https://openalex.org/W4360831844","https://openalex.org/W4383604976","https://openalex.org/W6639102338","https://openalex.org/W6658049321","https://openalex.org/W6678775411","https://openalex.org/W6854415014"],"related_works":["https://openalex.org/W1539956819","https://openalex.org/W2565094479","https://openalex.org/W2390829436","https://openalex.org/W2093152993","https://openalex.org/W3209251257","https://openalex.org/W2094714038","https://openalex.org/W2144404403","https://openalex.org/W2106967209","https://openalex.org/W2612632602","https://openalex.org/W2321805087"],"abstract_inverted_index":{"Vector":[0],"quantization(VQ)":[1],"is":[2,46],"a":[3,41,56,131],"hardware-friendly":[4],"DNN":[5],"compression":[6],"method":[7],"that":[8],"can":[9],"reduce":[10],"the":[11,30,62,68,78,83,89,96,100,110,113,138],"storage":[12],"cost":[13],"and":[14,75,86,99,129],"weight-loading":[15],"datawidth":[16],"of":[17,59],"hardware":[18],"accelerators.":[19],"However,":[20],"conventional":[21],"VQ":[22],"techniques":[23],"lead":[24],"to":[25,108,136],"significant":[26],"accuracy":[27],"loss":[28],"because":[29],"important":[31,53,70],"weights":[32,54,71,85,98],"are":[33,102,105],"not":[34],"well":[35],"preserved.":[36],"To":[37],"tackle":[38],"this":[39],"problem,":[40],"novel":[42],"approach":[43,66],"called":[44],"MVQ":[45],"proposed,":[47],"which":[48,104],"aims":[49],"at":[50],"better":[51],"approximating":[52],"with":[55],"limited":[57],"number":[58],"codewords.":[60,111],"At":[61,112],"algorithm":[63],"level,":[64,115],"our":[65,116],"removes":[67],"less":[69],"through":[72],"N:M":[73],"pruning":[74],"then":[76,106],"minimizes":[77],"vector":[79,119,143],"clustering":[80],"error":[81],"between":[82,95],"remaining":[84],"codewords":[87,101],"by":[88,141],"masked":[90,142],"k-means":[91],"algorithm.":[92],"Only":[93],"distances":[94],"unpruned":[97],"computed,":[103],"used":[107],"update":[109],"architecture":[114],"accelerator":[117,128],"implements":[118],"quantization":[120],"on":[121],"an":[122],"EWS":[123],"(Enhanced":[124],"weight":[125],"stationary)":[126],"CNN":[127],"proposes":[130],"sparse":[132],"systolic":[133],"array":[134],"design":[135],"maximize":[137],"benefits":[139],"brought":[140],"quantization.":[144]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2024-12-17T00:00:00"}
