{"id":"https://openalex.org/W4415103068","doi":"https://doi.org/10.1109/iccv51701.2025.02273","title":"ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba","display_name":"ViM-VQ: Efficient Post-Training Vector Quantization for Visual Mamba","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4415103068","doi":"https://doi.org/10.1109/iccv51701.2025.02273"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02273","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.09509","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065813707","display_name":"Juncan Deng","orcid":"https://orcid.org/0000-0003-0860-4442"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Juncan Deng","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019049217","display_name":"Shuaiting Li","orcid":"https://orcid.org/0009-0002-7726-4883"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaiting Li","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043817623","display_name":"Zeyu Wang","orcid":"https://orcid.org/0000-0003-0985-4478"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Wang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111345577","display_name":"Kedong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kedong Xu","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101434406","display_name":"Hong Gu","orcid":"https://orcid.org/0000-0002-8224-146X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong Gu","raw_affiliation_strings":["vivo Mobile Communication Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"vivo Mobile Communication Co., Ltd","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091073331","display_name":"Kejie Huang","orcid":"https://orcid.org/0000-0003-3722-9979"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kejie Huang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065813707"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25598043,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"24518","last_page":"24527"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9506000280380249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13579","display_name":"Image and Video Stabilization","score":0.9359999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6880999803543091},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.6491000056266785},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4702000021934509},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.46209999918937683},{"id":"https://openalex.org/keywords/linde\u2013buzo\u2013gray-algorithm","display_name":"Linde\u2013Buzo\u2013Gray algorithm","score":0.43720000982284546},{"id":"https://openalex.org/keywords/convex-hull","display_name":"Convex hull","score":0.426800012588501},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex optimization","score":0.4092000126838684},{"id":"https://openalex.org/keywords/convex-combination","display_name":"Convex combination","score":0.3560999929904938}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6880999803543091},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.6491000056266785},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5024999976158142},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.46209999918937683},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4494999945163727},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.44600000977516174},{"id":"https://openalex.org/C93372532","wikidata":"https://www.wikidata.org/wiki/Q6552455","display_name":"Linde\u2013Buzo\u2013Gray algorithm","level":3,"score":0.43720000982284546},{"id":"https://openalex.org/C206194317","wikidata":"https://www.wikidata.org/wiki/Q1138624","display_name":"Convex hull","level":3,"score":0.426800012588501},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.4092000126838684},{"id":"https://openalex.org/C111110010","wikidata":"https://www.wikidata.org/wiki/Q2627315","display_name":"Convex combination","level":4,"score":0.3560999929904938},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33820000290870667},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3156000077724457},{"id":"https://openalex.org/C40567965","wikidata":"https://www.wikidata.org/wiki/Q1820283","display_name":"Learning vector quantization","level":3,"score":0.31290000677108765},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C13336665","wikidata":"https://www.wikidata.org/wiki/Q125977","display_name":"Vector space","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.2678000032901764},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.26339998841285706},{"id":"https://openalex.org/C145446738","wikidata":"https://www.wikidata.org/wiki/Q319913","display_name":"Convex function","level":3,"score":0.2531999945640564}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02273","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.09509","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.09509","pdf_url":"https://arxiv.org/pdf/2503.09509","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2503.09509","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.09509","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.09509","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.09509","pdf_url":"https://arxiv.org/pdf/2503.09509","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4474298529","display_name":null,"funder_award_id":"62274142","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"Mamba":[1],"networks":[2,67],"(ViMs)":[3],"extend":[4],"the":[5,43,106,122,159,163],"selective":[6],"state":[7],"space":[8],"model":[9],"(Mamba)":[10],"to":[11,75,104,166,183],"various":[12,199],"vision":[13],"tasks":[14],"and":[15,32,38,62,68,118,162,171],"demonstrate":[16,189],"significant":[17],"potential.":[18],"As":[19],"a":[20,149],"promising":[21],"compression":[22],"technique,":[23],"vector":[24,136,175],"quantization":[25,58,99,137,176,197],"(VQ)":[26],"decomposes":[27],"network":[28],"weights":[29,88],"into":[30],"codebooks":[31],"assignments,":[33],"significantly":[34,97],"reducing":[35],"memory":[36,113],"usage":[37],"computational":[39],"latency,":[40],"thereby":[41],"enabling":[42],"deployment":[44],"of":[45,89,144],"ViMs":[46,76,93],"on":[47],"edge":[48],"devices.":[49],"Although":[50],"existing":[51],"VQ":[52,108],"methods":[53,74,109],"have":[54],"achieved":[55],"extremely":[56],"low-bit":[57,196],"(e.g.,":[59],"3-bit,":[60],"2-bit,":[61],"1-bit)":[63],"in":[64,78,92,121,195],"convolutional":[65],"neural":[66],"Transformer-based":[69],"networks,":[70],"directly":[71],"applying":[72],"these":[73],"results":[77,188],"unsatisfactory":[79],"accuracy.":[80],"We":[81],"identify":[82],"several":[83],"key":[84],"challenges:":[85],"1)":[86,148],"The":[87],"Mamba-based":[90],"blocks":[91],"contain":[94],"numerous":[95],"outliers,":[96],"amplifying":[98],"errors.":[100,186],"2)":[101,172],"When":[102],"applied":[103],"ViMs,":[105],"latest":[107],"suffer":[110],"from":[111],"excessive":[112],"consumption,":[114],"lengthy":[115],"calibration":[116],"procedures,":[117],"suboptimal":[119],"performance":[120,194],"search":[123,167],"for":[124,140,168],"optimal":[125,169,181],"codewords.":[126],"In":[127],"this":[128],"paper,":[129],"we":[130],"propose":[131],"ViM-VQ,":[132],"an":[133,173],"efficient":[134],"post-training":[135],"method":[138],"tailored":[139],"ViMs.":[141],"ViM-VQ":[142,191],"consists":[143],"two":[145],"innovative":[146],"components:":[147],"fast":[150],"convex":[151,160,164],"combination":[152],"optimization":[153],"algorithm":[154],"that":[155,178,190],"efficiently":[156],"updates":[157],"both":[158],"combinations":[161],"hulls":[165],"codewords,":[170],"incremental":[174],"strategy":[177],"incrementally":[179],"confirms":[180],"codewords":[182],"mitigate":[184],"truncation":[185],"Experimental":[187],"achieves":[192],"state-of-the-art":[193],"across":[198],"visual":[200],"tasks.":[201]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-13T00:00:00"}
