{"id":"https://openalex.org/W7164195775","doi":"https://doi.org/10.1109/fccm68464.2026.00025","title":"ViM-Q: Scalable Algorithm-Hardware Co-Design for Vision Mamba Model Inference on FPGA","display_name":"ViM-Q: Scalable Algorithm-Hardware Co-Design for Vision Mamba Model Inference on FPGA","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7164195775","doi":"https://doi.org/10.1109/fccm68464.2026.00025"},"language":null,"primary_location":{"id":"doi:10.1109/fccm68464.2026.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108948085","display_name":"Shengzhe Lyu","orcid":"https://orcid.org/0009-0004-7331-7700"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Shengzhe Lyu","raw_affiliation_strings":["City University of Hong Kong,Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of Hong Kong,Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006271768","display_name":"Yuhan She","orcid":"https://orcid.org/0000-0003-3748-577X"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yuhan She","raw_affiliation_strings":["City University of Hong Kong,Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of Hong Kong,Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119939040","display_name":"Patrick S. Y. Hung","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Patrick S. Y. Hung","raw_affiliation_strings":["City University of Hong Kong,Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of Hong Kong,Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138332174","display_name":"Ray C. C. Cheung","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ray C. C. Cheung","raw_affiliation_strings":["City University of Hong Kong,Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of Hong Kong,Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036424879","display_name":"Weitao Xu","orcid":"https://orcid.org/0000-0001-9741-5912"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Weitao Xu","raw_affiliation_strings":["City University of Hong Kong,Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of Hong Kong,Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.79647049,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"90","last_page":"99"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.2870999872684479,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.2870999872684479,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.2851000130176544,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.10239999741315842,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5167999863624573},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5037000179290771},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.45739999413490295},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.3239000141620636},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.29980000853538513}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6783000230789185},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5167999863624573},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5037000179290771},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.45739999413490295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37529999017715454},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3528999984264374},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.35249999165534973},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3239000141620636},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3203999996185303},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.29580000042915344},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.26019999384880066}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fccm68464.2026.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322170","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2565125333","https://openalex.org/W3138516171","https://openalex.org/W3158020960","https://openalex.org/W3180355996","https://openalex.org/W4206706211","https://openalex.org/W4213019189","https://openalex.org/W4285601751","https://openalex.org/W4313156423","https://openalex.org/W4385245566","https://openalex.org/W4389162698","https://openalex.org/W4400524700","https://openalex.org/W4408891426","https://openalex.org/W4409248406","https://openalex.org/W4409248495","https://openalex.org/W4409263053","https://openalex.org/W4409282403","https://openalex.org/W4410553236","https://openalex.org/W4410810557","https://openalex.org/W4410915127","https://openalex.org/W4411486388","https://openalex.org/W4413146579","https://openalex.org/W4413147024","https://openalex.org/W4413147759","https://openalex.org/W4413278525","https://openalex.org/W4413755548","https://openalex.org/W4414404693","https://openalex.org/W4415524158","https://openalex.org/W4415796023","https://openalex.org/W4416429538","https://openalex.org/W7106105931","https://openalex.org/W7133198810","https://openalex.org/W7133231030","https://openalex.org/W7160103572","https://openalex.org/W7160217808"],"related_works":[],"abstract_inverted_index":{"Vision":[0],"Mamba":[1],"(ViM)":[2],"models":[3,120,214],"offer":[4],"a":[5,81,94,110,124,129,133,144,193,208],"compelling":[6],"efficiency":[7,190],"advantage":[8],"over":[9,192],"Transformers":[10],"by":[11,72],"leveraging":[12],"the":[13,47,68,90,151,159,172],"linear":[14,130],"complexity":[15],"of":[16],"State":[17],"Space":[18],"Models":[19],"(SSMs),":[20],"yet":[21],"efficiently":[22],"deploying":[23,212],"them":[24],"on":[25,59,89,123,176,203,215],"FPGAs":[26],"remains":[27],"challenging.":[28],"Linear":[29],"layers":[30],"struggle":[31],"with":[32,67,140],"dynamic":[33,99],"activation":[34,101],"outliers":[35],"that":[36,149],"render":[37],"static":[38],"quantization":[39,43,96,102],"ineffective,":[40],"while":[41,54,154],"uniform":[42],"fails":[44],"to":[45,106,137,165],"capture":[46],"weight":[48,117],"distribution":[49],"at":[50],"low":[51],"bit-widths.":[52],"Furthermore,":[53],"associative":[55],"scan":[56],"accelerates":[57],"SSMs":[58],"GPUs,":[60],"its":[61],"memory":[62],"access":[63],"patterns":[64],"are":[65,121],"misaligned":[66],"streaming":[69],"dataflow":[70],"required":[71],"FPGAs.":[73],"To":[74],"address":[75],"these":[76],"challenges,":[77],"we":[78],"present":[79],"ViM-Q1,":[80],"scalable":[82],"algorithm-hardware":[83],"co-design":[84,206],"for":[85,200,211],"end-to-end":[86],"ViM":[87,173,213],"inference":[88,202],"edge.":[91],"We":[92],"introduce":[93],"hardware-aware":[95],"scheme":[97],"combining":[98],"per-token":[100],"and":[103,143,168,187],"per-channel":[104],"smoothing":[105],"mitigate":[107],"outliers,":[108],"alongside":[109],"custom":[111],"4-bit":[112],"per-block":[113],"Additive":[114],"Power-of-Two":[115],"(APoT)":[116],"quantization.":[118],"The":[119],"deployed":[122],"runtime-parameterizable":[125],"FPGA":[126],"accelerator":[127],"featuring":[128],"engine":[131,148],"employing":[132],"Lookup-Table":[134],"(LUT)":[135],"unit":[136],"replace":[138],"multiplications":[139],"shift-add":[141],"operations,":[142],"fine-grained":[145],"pipelined":[146],"SSM":[147],"parallelizes":[150],"state":[152],"dimension":[153],"preserving":[155],"sequential":[156],"recurrence.":[157],"Crucially,":[158],"hardware":[160],"supports":[161],"runtime":[162],"configuration,":[163],"adapting":[164],"diverse":[166],"dimensions":[167],"input":[169],"resolutions":[170],"across":[171],"family.":[174],"Implemented":[175],"an":[177,183],"AMD":[178],"ZCU102":[179],"FPGA,":[180],"ViM-Q":[181],"achieves":[182],"average":[184],"4.96\u00d7":[185],"speedup":[186],"59.8\u00d7":[188],"energy":[189],"gain":[191],"quantized":[194],"NVIDIA":[195],"RTX":[196],"3090":[197],"GPU":[198],"baseline":[199],"low-batch":[201],"ViM-tiny.":[204],"This":[205],"shows":[207],"viable":[209],"path":[210],"resource-constrained":[216],"edge":[217],"devices.":[218]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-06-11T00:00:00"}
