{"id":"https://openalex.org/W4409132079","doi":"https://doi.org/10.1109/hpec62836.2024.10938498","title":"GLITCHES: GPU-FPGA LLM Inference Through a Collaborative Heterogeneous System","display_name":"GLITCHES: GPU-FPGA LLM Inference Through a Collaborative Heterogeneous System","publication_year":2024,"publication_date":"2024-09-23","ids":{"openalex":"https://openalex.org/W4409132079","doi":"https://doi.org/10.1109/hpec62836.2024.10938498"},"language":"en","primary_location":{"id":"doi:10.1109/hpec62836.2024.10938498","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938498","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101858187","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0002-4113-764X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077505315","display_name":"Xinhao Yang","orcid":"https://orcid.org/0000-0002-0873-4514"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhao Yang","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100701041","display_name":"Hongyi Wang","orcid":"https://orcid.org/0009-0006-0034-0074"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyi Wang","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003892449","display_name":"Zehao Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zehao Wang","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103138440","display_name":"Zhenhua Zhu","orcid":"https://orcid.org/0009-0007-9259-7180"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Zhu","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026831784","display_name":"Shulin Zeng","orcid":"https://orcid.org/0000-0002-1030-3748"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shulin Zeng","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100445098","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0001-7959-3387"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Tsinghua University,BNRist,Dept. of EE"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,BNRist,Dept. of EE","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101858187"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.7372,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76104064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9323999881744385,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9323999881744385,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.920199990272522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.8164680004119873},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7669816017150879},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.686702311038971},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4875306785106659},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4398070275783539},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4309694766998291},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.39796310663223267},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.2669484615325928},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23174205422401428}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.8164680004119873},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7669816017150879},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.686702311038971},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4875306785106659},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4398070275783539},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4309694766998291},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.39796310663223267},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2669484615325928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23174205422401428},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec62836.2024.10938498","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938498","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4000000059604645}],"awards":[{"id":"https://openalex.org/G536078878","display_name":null,"funder_award_id":"62325405,62104128,U21B2031,62204164","funder_id":"https://openalex.org/F4320321002","funder_display_name":"Research Promotion Foundation"}],"funders":[{"id":"https://openalex.org/F4320321002","display_name":"Research Promotion Foundation","ror":"https://ror.org/00en9ce74"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W3034855459","https://openalex.org/W4308083513","https://openalex.org/W4321636575","https://openalex.org/W4384561707","https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4392240262","https://openalex.org/W4393578753","https://openalex.org/W4393949386","https://openalex.org/W4394745423","https://openalex.org/W6798182279","https://openalex.org/W6838322825","https://openalex.org/W6838461927","https://openalex.org/W6850625674","https://openalex.org/W6850927664","https://openalex.org/W6852818246","https://openalex.org/W6853516335","https://openalex.org/W6854475153","https://openalex.org/W6857972656","https://openalex.org/W6865331541","https://openalex.org/W6870207544"],"related_works":["https://openalex.org/W2505380084","https://openalex.org/W2111241003","https://openalex.org/W4400333498","https://openalex.org/W2355315220","https://openalex.org/W4200391368","https://openalex.org/W2210979487","https://openalex.org/W1967938402","https://openalex.org/W1980160788","https://openalex.org/W2386041993","https://openalex.org/W1608572506"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"demonstrate":[4,151],"strong":[5],"capabilities":[6],"across":[7],"various":[8],"tasks.":[9],"However,":[10],"in":[11,59,66,172,180],"latency-sensitive":[12],"scenarios,":[13],"a":[14,47,89,153,168,176,185],"small":[15],"batch":[16,20],"or":[17,50],"even":[18],"one":[19],"is":[21,44],"usually":[22],"required.":[23],"This":[24,81],"leads":[25],"to":[26,53,136,184],"the":[27,30,76,98,109,115,119,132,139,145],"prefill":[28,68,110],"and":[29,38,72,103,105,112,123,163,175],"decode":[31,77,116,146],"stage":[32,78,111],"of":[33,62,101,121],"LLM":[34,63,86,156],"inference":[35,87,157],"being":[36],"computational":[37,57],"memory":[39,141],"bottlenecks,":[40],"respectively.":[41],"Therefore,":[42],"it":[43],"difficult":[45],"for":[46,108,114],"homogeneous":[48,186],"FPGA":[49],"GPU":[51,162],"system":[52,158,173,187],"simultaneously":[54],"address":[55],"different":[56,60,99],"bottlenecks":[58],"stages":[61],"inference,":[64],"resulting":[65],"long":[67],"latency":[69],"on":[70,79,126,148],"FPGAs":[71,104,113,166],"low":[73],"utilization":[74,143],"during":[75,144],"GPUs.":[80,191],"paper":[82],"proposes":[83],"GLITCHES,":[84],"GPU-FPGA":[85],"through":[88],"collaborative":[90],"heterogeneous":[91,155],"system.":[92],"In":[93],"this":[94],"paper,":[95],"we":[96,130],"analyze":[97],"characteristics":[100],"GPUs":[102,107,122],"employ":[106],"stage,":[117],"leveraging":[118],"strengths":[120],"FPGAs.":[124,149],"Based":[125],"HBM":[127],"profiling":[128],"results,":[129],"apply":[131],"data":[133],"prefetching":[134],"technique":[135],"further":[137],"improve":[138],"off-chip":[140],"bandwidth":[142],"computations":[147],"Experiments":[150],"that":[152],"GLITCHES":[154],"with":[159,188],"an":[160],"A100":[161],"seven":[164],"U280":[165],"achieves":[167],"1.28/1.34":[169],"times":[170,178],"improvement":[171,179],"throughput":[174],"2.38/1.90":[177],"cost":[181],"efficiency":[182],"compared":[183],"8-card":[189],"A100/V100S":[190]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-04T07:04:00.330322","created_date":"2025-10-10T00:00:00"}
