{"id":"https://openalex.org/W4410553409","doi":"https://doi.org/10.23919/date64628.2025.10993087","title":"Pushing up to the Limit of Memory Bandwidth and Capacity Utilization for Efficient LLM Decoding on Embedded FPGA","display_name":"Pushing up to the Limit of Memory Bandwidth and Capacity Utilization for Efficient LLM Decoding on Embedded FPGA","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4410553409","doi":"https://doi.org/10.23919/date64628.2025.10993087"},"language":"en","primary_location":{"id":"doi:10.23919/date64628.2025.10993087","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10993087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042031737","display_name":"Jindong Li","orcid":"https://orcid.org/0000-0002-4009-916X"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jindong Li","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119309865","display_name":"Tenglong Li","orcid":"https://orcid.org/0009-0007-3266-2075"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tenglong Li","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081220735","display_name":"Guobin Shen","orcid":"https://orcid.org/0000-0002-4069-2107"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guobin Shen","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091698766","display_name":"Dongcheng Zhao","orcid":"https://orcid.org/0000-0002-0593-8650"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongcheng Zhao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119309918","display_name":"Qian Zhang","orcid":"https://orcid.org/0000-0001-5314-4233"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025846619","display_name":"Yi Zeng","orcid":"https://orcid.org/0000-0002-9595-9091"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zeng","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,Brain-inspired Cognitive Intelligence Lab","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5042031737"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":17.1095,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.99228574,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.8971999883651733,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.8971999883651733,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.8880000114440918,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11693","display_name":"Cryptography and Residue Arithmetic","score":0.8842999935150146,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7748237252235413},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7219237685203552},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.6459934115409851},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.5868362188339233},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5692800879478455},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.29688534140586853},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.2537553310394287},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14778035879135132}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7748237252235413},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7219237685203552},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.6459934115409851},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.5868362188339233},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5692800879478455},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.29688534140586853},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.2537553310394287},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14778035879135132},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/date64628.2025.10993087","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10993087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8100000023841858,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G2927787986","display_name":null,"funder_award_id":"E411230101","funder_id":"https://openalex.org/F4320335639","funder_display_name":"Institute of Automation, Chinese Academy of Sciences"}],"funders":[{"id":"https://openalex.org/F4320335639","display_name":"Institute of Automation, Chinese Academy of Sciences","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2616014673","https://openalex.org/W2962834855","https://openalex.org/W3184454880","https://openalex.org/W4205531259","https://openalex.org/W4308083513","https://openalex.org/W4321637298","https://openalex.org/W4360831786","https://openalex.org/W4379471878","https://openalex.org/W4383371242","https://openalex.org/W4388979610","https://openalex.org/W4393140379","https://openalex.org/W4393578753","https://openalex.org/W4393949386","https://openalex.org/W4395704222","https://openalex.org/W4403278717","https://openalex.org/W4404411631","https://openalex.org/W4405909140","https://openalex.org/W4408182386","https://openalex.org/W6739901393","https://openalex.org/W6751068846","https://openalex.org/W6755207826","https://openalex.org/W6767440493","https://openalex.org/W6847478871","https://openalex.org/W6854866820","https://openalex.org/W6862020115","https://openalex.org/W6868546719","https://openalex.org/W6870845358"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2111241003","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2355315220","https://openalex.org/W4200391368","https://openalex.org/W2210979487"],"abstract_inverted_index":{"The":[0],"extremely":[1],"high":[2],"computational":[3],"and":[4,39,63,123,143,177,224],"storage":[5],"demands":[6],"of":[7,36,42,61,98,113,119,128],"large":[8,48,85],"language":[9,49,86],"models":[10],"have":[11],"excluded":[12],"most":[13],"edge":[14,30],"devices,":[15],"which":[16],"were":[17],"widely":[18],"used":[19],"for":[20,84,140,163,227],"efficient":[21,217],"machine":[22],"learning,":[23],"from":[24],"being":[25],"viable":[26],"options.":[27],"A":[28],"typical":[29],"device":[31],"usually":[32],"only":[33],"has":[34],"4GB":[35,97],"memory":[37,121,131,138],"capacity":[38,122,139],"a":[40,47,81,106,110,151,169,179,198,203],"bandwidth":[41,132,162],"less":[43],"than":[44],"20GB/s,":[45],"while":[46],"model":[50,87,141,164],"quantized":[51],"to":[52,75,196],"4-bit":[53],"precision":[54],"with":[55,96,172],"7B":[56,199],"parameters":[57],"already":[58],"requires":[59],"3.5GB":[60],"capacity,":[62],"its":[64],"decoding":[65,111,126],"process":[66],"is":[67],"purely":[68],"bandwidth-bound.":[69],"In":[70],"this":[71],"paper,":[72],"we":[73,146,167],"aim":[74],"explore":[76],"these":[77],"limits":[78],"by":[79],"proposing":[80],"hardware":[82],"accelerator":[83],"(LLM)":[88],"inference":[89,219],"on":[90,202,220],"the":[91,120,129,137,148,161,186,193],"Zynq-based":[92],"KV260":[93],"platform,":[94],"equipped":[95],"64-bit":[99],"2400Mbps":[100],"DDR4":[101],"memory.":[102],"We":[103],"successfully":[104],"deploy":[105,197],"LLaMA2-7B":[107],"model,":[108],"achieving":[109],"speed":[112,127],"around":[114],"5":[115],"token/s,":[116],"utilizing":[117],"93.3%":[118],"reaching":[124],"85%":[125],"theoretical":[130],"limit.":[133],"To":[134,158],"fully":[135,159],"reserve":[136,160],"weights":[142],"key-value":[144],"cache,":[145],"develop":[147],"system":[149],"in":[150],"bare-metal":[152],"environment":[153],"without":[154],"an":[155,173],"operating":[156],"system.":[157],"weight":[165],"transfers,":[166],"implement":[168],"customized":[170],"dataflow":[171],"operator":[174],"fusion":[175],"pipeline":[176],"propose":[178],"data":[180,187],"arrangement":[181],"format":[182],"that":[183],"can":[184],"maximize":[185],"transaction":[188],"efficiency.":[189],"This":[190],"research":[191],"marks":[192],"first":[194],"attempt":[195],"level":[200],"LLM":[201,218],"standalone":[204],"embedded":[205,221],"field":[206],"programmable":[207],"gate":[208],"array":[209],"(FPGA)":[210],"device.":[211],"It":[212],"provides":[213,225],"key":[214],"insights":[215],"into":[216],"FPGA":[222],"devices":[223],"guidelines":[226],"future":[228],"architecture":[229],"design.":[230]},"counts_by_year":[{"year":2025,"cited_by_count":7}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
