{"id":"https://openalex.org/W7160293321","doi":"https://doi.org/10.48550/arxiv.2605.01935","title":"ViM-Q: Scalable Algorithm-Hardware Co-Design for Vision Mamba Model Inference on FPGA","display_name":"ViM-Q: Scalable Algorithm-Hardware Co-Design for Vision Mamba Model Inference on FPGA","publication_year":2026,"publication_date":"2026-05-03","ids":{"openalex":"https://openalex.org/W7160293321","doi":"https://doi.org/10.48550/arxiv.2605.01935"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.01935","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01935","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.01935","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108948085","display_name":"Shengzhe Lyu","orcid":"https://orcid.org/0009-0004-7331-7700"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyu, Shengzhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006271768","display_name":"Yuhan She","orcid":"https://orcid.org/0000-0003-3748-577X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"She, Yuhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119939040","display_name":"Patrick S. Y. Hung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hung, Patrick S. Y.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135299225","display_name":"Ray C. C. Cheung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheung, Ray C. C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5036424879","display_name":"Weitao Xu","orcid":"https://orcid.org/0000-0001-9741-5912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Weitao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.3564000129699707,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.3564000129699707,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1340000033378601,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.09730000048875809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6725000143051147},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6238999962806702},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.588699996471405},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5827000141143799},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5081999897956848},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.4860999882221222},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.42980000376701355},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.34869998693466187}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7857999801635742},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6725000143051147},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6238999962806702},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.588699996471405},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5827000141143799},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5081999897956848},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.4860999882221222},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4650000035762787},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.42980000376701355},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.36660000681877136},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3621000051498413},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.34869998693466187},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.3440000116825104},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3336000144481659},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3086000084877014},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.2985000014305115},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2980000078678131},{"id":"https://openalex.org/C159379195","wikidata":"https://www.wikidata.org/wiki/Q7239568","display_name":"Precomputation","level":3,"score":0.2971000075340271},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.28209999203681946},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.01935","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01935","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.01935","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01935","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.9000688195228577,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision":[0],"Mamba":[1],"(ViM)":[2],"models":[3,120,214],"offer":[4],"a":[5,81,94,110,124,129,133,144,193,208],"compelling":[6],"efficiency":[7,190],"advantage":[8],"over":[9,192],"Transformers":[10],"by":[11,72],"leveraging":[12],"the":[13,47,68,90,151,159,172],"linear":[14,130],"complexity":[15],"of":[16],"State":[17],"Space":[18],"Models":[19],"(SSMs),":[20],"yet":[21],"efficiently":[22],"deploying":[23,212],"them":[24],"on":[25,59,89,123,176,203,215],"FPGAs":[26],"remains":[27],"challenging.":[28],"Linear":[29],"layers":[30],"struggle":[31],"with":[32,67,140],"dynamic":[33,99],"activation":[34,101],"outliers":[35],"that":[36,149],"render":[37],"static":[38],"quantization":[39,43,96,102],"ineffective,":[40],"while":[41,54,154],"uniform":[42],"fails":[44],"to":[45,106,137,165],"capture":[46],"weight":[48,117],"distribution":[49],"at":[50],"low":[51],"bit-widths.":[52],"Furthermore,":[53],"associative":[55],"scan":[56],"accelerates":[57],"SSMs":[58],"GPUs,":[60],"its":[61],"memory":[62],"access":[63],"patterns":[64],"are":[65,121],"misaligned":[66],"streaming":[69],"dataflow":[70],"required":[71],"FPGAs.":[73],"To":[74],"address":[75],"these":[76],"challenges,":[77],"we":[78],"present":[79],"ViM-Q,":[80],"scalable":[82],"algorithm-hardware":[83],"co-design":[84,206],"for":[85,200,211],"end-to-end":[86],"ViM":[87,173,213],"inference":[88,202],"edge.":[91],"We":[92],"introduce":[93],"hardware-aware":[95],"scheme":[97],"combining":[98],"per-token":[100],"and":[103,143,168,187],"per-channel":[104],"smoothing":[105],"mitigate":[107],"outliers,":[108],"alongside":[109],"custom":[111],"4-bit":[112],"per-block":[113],"Additive":[114],"Power-of-Two":[115],"(APoT)":[116],"quantization.":[118],"The":[119],"deployed":[122],"runtime-parameterizable":[125],"FPGA":[126],"accelerator":[127],"featuring":[128],"engine":[131,148],"employing":[132],"Lookup-Table":[134],"(LUT)":[135],"unit":[136],"replace":[138],"multiplications":[139],"shift-add":[141],"operations,":[142],"fine-grained":[145],"pipelined":[146],"SSM":[147],"parallelizes":[150],"state":[152],"dimension":[153],"preserving":[155],"sequential":[156],"recurrence.":[157],"Crucially,":[158],"hardware":[160],"supports":[161],"runtime":[162],"configuration,":[163],"adapting":[164],"diverse":[166],"dimensions":[167],"input":[169],"resolutions":[170],"across":[171],"family.":[174],"Implemented":[175],"an":[177,183],"AMD":[178],"ZCU102":[179],"FPGA,":[180],"ViM-Q":[181],"achieves":[182],"average":[184],"4.96x":[185],"speedup":[186],"59.8x":[188],"energy":[189],"gain":[191],"quantized":[194],"NVIDIA":[195],"RTX":[196],"3090":[197],"GPU":[198],"baseline":[199],"low-batch":[201],"ViM-tiny.":[204],"This":[205],"shows":[207],"viable":[209],"path":[210],"resource-constrained":[216],"edge":[217],"devices.":[218]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
