{"id":"https://openalex.org/W7127881893","doi":"https://doi.org/10.1145/3748173.3779191","title":"TeLLMe: An Efficient End-to-End Ternary LLM Prefill and Decode Accelerator with Table-Lookup Matmul on Edge FPGAs","display_name":"TeLLMe: An Efficient End-to-End Ternary LLM Prefill and Decode Accelerator with Table-Lookup Matmul on Edge FPGAs","publication_year":2026,"publication_date":"2026-02-05","ids":{"openalex":"https://openalex.org/W7127881893","doi":"https://doi.org/10.1145/3748173.3779191"},"language":null,"primary_location":{"id":"doi:10.1145/3748173.3779191","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3748173.3779191","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3748173.3779191","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101293879","display_name":"Ye Qiao","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ye Qiao","raw_affiliation_strings":["University of California, Irvine, Irvine, California, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, California, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123387918","display_name":"Zhiheng Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiheng Chen","raw_affiliation_strings":["University of California, Irvine, Irvine, California, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, California, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094333359","display_name":"Yifan Zhang","orcid":"https://orcid.org/0009-0002-7315-8151"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Zhang","raw_affiliation_strings":["University of California, Irvine, Irvine, California, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, California, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123394653","display_name":"Yian Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yian Wang","raw_affiliation_strings":["University of California, Irvine, Irvine, California, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, California, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123429052","display_name":"Sitao Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sitao Huang","raw_affiliation_strings":["University of California, Irvine, Irvine, California, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, Irvine, California, USA","institution_ids":["https://openalex.org/I204250578"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101293879"],"corresponding_institution_ids":["https://openalex.org/I204250578"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61344196,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"247","last_page":"257"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3052000105381012,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3052000105381012,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.10599999874830246,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.09369999915361404,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.6261000037193298},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.592199981212616},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5072000026702881},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5041000247001648},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.46869999170303345},{"id":"https://openalex.org/keywords/precomputation","display_name":"Precomputation","score":0.4562000036239624},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.45159998536109924},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.4481000006198883},{"id":"https://openalex.org/keywords/control-reconfiguration","display_name":"Control reconfiguration","score":0.4106999933719635}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7986000180244446},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.6261000037193298},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.592199981212616},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5072000026702881},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5041000247001648},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.46869999170303345},{"id":"https://openalex.org/C159379195","wikidata":"https://www.wikidata.org/wiki/Q7239568","display_name":"Precomputation","level":3,"score":0.4562000036239624},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.45159998536109924},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.4481000006198883},{"id":"https://openalex.org/C119701452","wikidata":"https://www.wikidata.org/wiki/Q5165881","display_name":"Control reconfiguration","level":2,"score":0.4106999933719635},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3959999978542328},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.37929999828338623},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.365200012922287},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3467000126838684},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.34599998593330383},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.33250001072883606},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3253999948501587},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.2874999940395355},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3748173.3779191","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3748173.3779191","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3748173.3779191","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3748173.3779191","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2951537853","https://openalex.org/W2996485093","https://openalex.org/W3093982621","https://openalex.org/W4313564414","https://openalex.org/W4317795323","https://openalex.org/W4389162698","https://openalex.org/W4393578753","https://openalex.org/W4407953506","https://openalex.org/W4408182386","https://openalex.org/W4415590998"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,66,71,77],"emergence":[2],"of":[3,27,70],"wearable":[4],"devices":[5],"and":[6,31,65,92,98,119,126,143,178,199],"other":[7],"embedded":[8],"systems,":[9],"deploying":[10],"large":[11],"language":[12],"models":[13],"(LLMs)":[14],"on":[15,217],"edge":[16,54,85,218],"platforms":[17],"becomes":[18],"an":[19],"urgent":[20],"need.":[21],"However,":[22],"it":[23],"is":[24,56],"challenging":[25],"because":[26],"their":[28],"high":[29,127,175],"computational":[30],"memory":[32,142,176],"demands.":[33],"Although":[34],"recent":[35],"low-bitwidth":[36],"quantization":[37],"methods":[38],"(e.g.,":[39],"BitNet,":[40],"DeepSeek)":[41],"compress":[42],"weights":[43,97],"to":[44,161,194,201],"as":[45,47],"low":[46,123],"1.58":[48],"bits":[49],"with":[50,158,170],"minimal":[51],"accuracy":[52],"loss,":[53],"deployment":[55],"still":[57],"constrained":[58],"by":[59],"limited":[60],"on-chip":[61],"resources,":[62],"power":[63,189],"budgets,":[64],"often-neglected":[67],"long":[68],"latency":[69],"prefill":[72,91,167],"stage.":[73],"We":[74],"present":[75],"TeLLMe,":[76],"first":[78],"table-lookup-based":[79,110],"ternary":[80,111],"LLM":[81,215],"accelerator":[82],"for":[83,122,174,205],"low-power":[84],"FPGAs":[86],"that":[87,153],"fully":[88],"supports":[89],"both":[90],"autoregressive":[93],"decoding":[94,183,197],"using":[95],"1.58-bit":[96],"8-bit":[99],"activations.":[100],"TeLLMe":[101,191],"incorporates":[102],"our":[103],"proposed":[104],"novel":[105],"techniques":[106],"including":[107],"(1)":[108],"a":[109,130,149,165,180,187,210],"matrix":[112],"multiplication":[113],"(TLMM)":[114],"engine":[115,145],"utilizing":[116],"grouped":[117],"activations":[118],"online":[120],"precomputation":[121],"resource":[124],"utilization":[125],"throughput;":[128],"(2)":[129],"fine-grained":[131],"URAM-based":[132],"weight":[133,138,146],"buffer":[134],"management":[135],"scheme":[136],"supporting":[137],"loading":[139],"from":[140],"global":[141],"compute":[144],"access;":[147],"(3)":[148],"streaming":[150],"dataflow":[151],"architecture":[152],"fuses":[154],"floating-point":[155],"element-wise":[156],"operations":[157],"linear":[159],"computations":[160],"hide":[162],"latency;":[163],"(4)":[164],"reversed-reordered":[166],"stage":[168,184],"attention":[169,172],"fused":[171],"operation":[173],"efficiency;":[177],"(5)":[179],"resource-efficient":[181],"specialized":[182],"attention.":[185],"Under":[186],"5W":[188],"budget,":[190],"delivers":[192],"up":[193],"25":[195],"tokens/s":[196],"throughput":[198],"0.45s":[200],"0.96s":[202],"Time-to-First-Token":[203],"(TTFT)":[204],"64\u2013128":[206],"token":[207],"prompts,":[208],"marking":[209],"significant":[211],"energy-efficiency":[212],"advancement":[213],"in":[214],"inference":[216],"FPGAs.":[219]},"counts_by_year":[],"updated_date":"2026-02-07T06:15:42.627816","created_date":"2026-02-07T00:00:00"}
