{"id":"https://openalex.org/W7164126783","doi":"https://doi.org/10.1109/fccm68464.2026.00026","title":"ReCoVLM: A Reconfigurable FPGA\u2013GPU Co-Design for Edge Vision-Language Inference","display_name":"ReCoVLM: A Reconfigurable FPGA\u2013GPU Co-Design for Edge Vision-Language Inference","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7164126783","doi":"https://doi.org/10.1109/fccm68464.2026.00026"},"language":null,"primary_location":{"id":"doi:10.1109/fccm68464.2026.00026","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00026","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108047889","display_name":"Yi-Xiang Wang","orcid":"https://orcid.org/0000-0001-5697-0717"},"institutions":[{"id":"https://openalex.org/I198091727","display_name":"Tiangong University","ror":"https://ror.org/00xsr9m91","country_code":"CN","type":"education","lineage":["https://openalex.org/I198091727"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyu Wang","raw_affiliation_strings":["Tiangong University,Tianjin,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tiangong University,Tianjin,China","institution_ids":["https://openalex.org/I198091727"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138382834","display_name":"Yongjiang Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I198091727","display_name":"Tiangong University","ror":"https://ror.org/00xsr9m91","country_code":"CN","type":"education","lineage":["https://openalex.org/I198091727"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongjiang Xue","raw_affiliation_strings":["Tiangong University,Tianjin,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tiangong University,Tianjin,China","institution_ids":["https://openalex.org/I198091727"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138354538","display_name":"Kailai Zhuang","orcid":null},"institutions":[{"id":"https://openalex.org/I198091727","display_name":"Tiangong University","ror":"https://ror.org/00xsr9m91","country_code":"CN","type":"education","lineage":["https://openalex.org/I198091727"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kailai Zhuang","raw_affiliation_strings":["Tiangong University,Tianjin,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tiangong University,Tianjin,China","institution_ids":["https://openalex.org/I198091727"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138333977","display_name":"Mingze Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I198091727","display_name":"Tiangong University","ror":"https://ror.org/00xsr9m91","country_code":"CN","type":"education","lineage":["https://openalex.org/I198091727"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingze Sun","raw_affiliation_strings":["Tiangong University,Tianjin,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tiangong University,Tianjin,China","institution_ids":["https://openalex.org/I198091727"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5138295444","display_name":"Qingzeng Song","orcid":null},"institutions":[{"id":"https://openalex.org/I198091727","display_name":"Tiangong University","ror":"https://ror.org/00xsr9m91","country_code":"CN","type":"education","lineage":["https://openalex.org/I198091727"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingzeng Song","raw_affiliation_strings":["Tiangong University,Tianjin,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tiangong University,Tianjin,China","institution_ids":["https://openalex.org/I198091727"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I198091727"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.85054428,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"100","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.3781000077724457,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.3781000077724457,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.33320000767707825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.04600000008940697,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.41350001096725464},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3952000141143799},{"id":"https://openalex.org/keywords/edge-detection","display_name":"Edge detection","score":0.2856000065803528},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2596000134944916},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.25929999351501465}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6013000011444092},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42100000381469727},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.41350001096725464},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3952000141143799},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38429999351501465},{"id":"https://openalex.org/C193536780","wikidata":"https://www.wikidata.org/wiki/Q1513153","display_name":"Edge detection","level":4,"score":0.2856000065803528},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.271699994802475},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2680000066757202},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fccm68464.2026.00026","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00026","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2560730294","https://openalex.org/W2963518342","https://openalex.org/W2979382951","https://openalex.org/W4385187240","https://openalex.org/W4393578753","https://openalex.org/W4393949386","https://openalex.org/W4403081466","https://openalex.org/W4403939369","https://openalex.org/W4404582879","https://openalex.org/W4407341646","https://openalex.org/W4408182386","https://openalex.org/W4409132079","https://openalex.org/W4409262075","https://openalex.org/W4410810432","https://openalex.org/W4411232024","https://openalex.org/W4411447392","https://openalex.org/W4413144996","https://openalex.org/W4413156814","https://openalex.org/W4416429575","https://openalex.org/W7124471359","https://openalex.org/W7133193597","https://openalex.org/W7133227460"],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"demonstrate":[3,251],"remarkable":[4],"capabilities":[5],"in":[6,75,130,272],"open-world":[7],"understanding":[8],"and":[9,24,40,53,77,109,148,162,189,209,221,246],"interaction.":[10],"However,":[11],"edge":[12,61,96,136,146],"deployment":[13,193],"remains":[14],"challenging":[15],"due":[16],"to":[17,71,113,123,155,213,231,260],"strict":[18],"constraints":[19],"on":[20,236],"power,":[21],"latency,":[22],"reliability,":[23],"privacy":[25],"requirements.":[26],"The":[27],"VLM":[28,97],"inference":[29,254],"pipeline":[30],"typically":[31],"consists":[32],"of":[33,166,172,198,204,256],"three":[34],"stages:":[35],"visual":[36,107,167],"encoding,":[37],"cross-modal":[38,110],"prefill,":[39],"autoregressive":[41,120],"decoding.":[42],"These":[43],"stages":[44,112],"exhibit":[45],"distinct":[46],"characteristics":[47],"regarding":[48],"operator":[49,207],"types,":[50],"parallel":[51],"granularity,":[52],"memory":[54],"access":[55],"patterns.":[56],"Consequently,":[57],"a":[58,88,141,156,181,237,241,247,269],"single":[59],"homogeneous":[60],"device":[62],"(e.g.,":[63],"CPU,":[64],"GPU,":[65,115],"NPU,":[66],"or":[67],"FPGA)":[68],"often":[69],"fails":[70],"achieve":[72],"simultaneous":[73],"optimality":[74],"performance":[76],"energy":[78,132,273],"efficiency":[79],"across":[80],"the":[81,105,114,118,124,152,160,173,177,226],"entire":[82],"pipeline.To":[83],"address":[84],"this,":[85],"we":[86,103,139,169,179],"propose":[87,190],"configurable":[89],"FPGA\u2013GPU":[90],"heterogeneous":[91,238],"collaborative":[92],"system":[93,267],"tailored":[94],"for":[95,145,195],"inference.":[98],"Leveraging":[99],"specific":[100],"hardware":[101,161],"strengths,":[102],"map":[104],"compute-intensive":[106],"encoding":[108],"Prefill":[111],"while":[116],"offloading":[117],"bandwidth-sensitive":[119],"decode":[121],"stage":[122],"FPGA.":[125],"This":[126],"workload-aware":[127],"mapping":[128],"results":[129,235],"improved":[131],"efficiency.":[133,274],"To":[134],"mitigate":[135],"resource":[137],"constraints,":[138],"designed":[140],"model":[142,153],"compression":[143],"method":[144,194],"GPUs":[147],"FPGAs.":[149],"By":[150],"compressing":[151],"parameters":[154],"range":[157],"acceptable":[158],"by":[159,218],"using":[163],"only":[164],"5%":[165],"tokens,":[168],"achieved":[170],"88.9%":[171],"baseline":[174],"performance.":[175],"On":[176],"FPGA,":[178],"implement":[180],"pipelined":[182],"decoder":[183],"that":[184],"efficiently":[185],"utilizes":[186],"DDR":[187],"bandwidth":[188,215,228],"an":[191,252,261],"efficient":[192],"MoE":[196],"(Mixture":[197],"Experts).":[199],"Including":[200],"aligned":[201],"block":[202],"layout":[203],"expert":[205],"weights,":[206],"fusion,":[208],"on-chip":[210],"caching":[211],"strategies":[212],"reduce":[214],"fluctuations":[216],"caused":[217],"weight":[219],"switching":[220],"improve":[222],"throughput":[223,255],"stability,":[224],"enables":[225],"effective":[227],"utilization":[229],"rate":[230],"reach":[232],"87.2%.":[233],"Experimental":[234],"platform":[239],"comprising":[240],"Jetson":[242],"Orin":[243],"Nano":[244],"(8GB)":[245],"Xilinx":[248],"VU9P":[249],"FPGA":[250],"end-to-end":[253],"18.1":[257],"tokens/s.":[258],"Compared":[259],"NVIDIA":[262],"RTX":[263],"4090":[264],"baseline,":[265],"our":[266],"achieves":[268],"2.67\u00d7":[270],"improvement":[271]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-06-11T00:00:00"}
