{"id":"https://openalex.org/W7123352536","doi":"https://doi.org/10.1109/candar68384.2025.00018","title":"Energy-Efficient Hardware Acceleration of Whisper ASR on a CGLA","display_name":"Energy-Efficient Hardware Acceleration of Whisper ASR on a CGLA","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7123352536","doi":"https://doi.org/10.1109/candar68384.2025.00018"},"language":null,"primary_location":{"id":"doi:10.1109/candar68384.2025.00018","is_oa":false,"landing_page_url":"https://doi.org/10.1109/candar68384.2025.00018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Thirteenth International Symposium on Computing and Networking (CANDAR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072649554","display_name":"Takuto Ando","orcid":"https://orcid.org/0000-0001-5749-7730"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takuto Ando","raw_affiliation_strings":["Nara Institute of Science and Technology,Nara,Japan,630-0192"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology,Nara,Japan,630-0192","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122903701","display_name":"Yu Eto Ayumu Takeuchi","orcid":null},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yu Eto Ayumu Takeuchi","raw_affiliation_strings":["Nara Institute of Science and Technology,Nara,Japan,630-0192"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology,Nara,Japan,630-0192","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061486185","display_name":"Yasuhiko NAKASHIMA","orcid":null},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yasuhiko Nakashima","raw_affiliation_strings":["Nara Institute of Science and Technology,Nara,Japan,630-0192"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology,Nara,Japan,630-0192","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072649554"],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":2.4166,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91970033,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"85","last_page":"91"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.3059000074863434,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.3059000074863434,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.13519999384880066,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.07660000026226044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/application-specific-integrated-circuit","display_name":"Application-specific integrated circuit","score":0.7038000226020813},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6108999848365784},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5914999842643738},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.5529000163078308},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5189999938011169},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.49709999561309814},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.48829999566078186},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.4675999879837036},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4377000033855438}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7483000159263611},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.7038000226020813},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6108999848365784},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5914999842643738},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.5752000212669373},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.5529000163078308},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5189999938011169},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.49709999561309814},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.48829999566078186},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.4675999879837036},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4542999863624573},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4377000033855438},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.34290000796318054},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C3020431745","wikidata":"https://www.wikidata.org/wiki/Q25325220","display_name":"Many core","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C2982832238","wikidata":"https://www.wikidata.org/wiki/Q5531640","display_name":"General purpose","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C118021083","wikidata":"https://www.wikidata.org/wiki/Q610398","display_name":"System on a chip","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/candar68384.2025.00018","is_oa":false,"landing_page_url":"https://doi.org/10.1109/candar68384.2025.00018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Thirteenth International Symposium on Computing and Networking (CANDAR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.7828821539878845}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,103],"rise":[1],"of":[2],"generative":[3],"AI":[4],"for":[5,93,126,138],"tasks":[6],"like":[7],"Automatic":[8],"Speech":[9],"Recognition":[10],"(ASR)":[11],"has":[12],"created":[13],"a":[14,48,65,69,94,135],"critical":[15],"energy":[16,101],"consumption":[17],"challenge.":[18],"While":[19],"ASICs":[20],"offer":[21],"high":[22],"efficiency,":[23],"they":[24],"lack":[25],"the":[26,46,60,112,127],"programmability":[27],"to":[28,30,63],"adapt":[29],"evolving":[31],"algorithms.":[32],"To":[33,55],"address":[34],"this":[35,58],"trade-off,":[36],"we":[37,82],"implement":[38],"and":[39,71,77,90,117],"evaluate":[40,83],"Whisper\u2019s":[41],"core":[42],"computational":[43],"kernel":[44,67],"on":[45,68,141],"IMAX,":[47],"general-purpose":[49],"Coarse-Grained":[50],"Linear":[51],"Arrays":[52],"(CGLAs)":[53],"accelerator.":[54],"our":[56,84],"knowledge,":[57],"is":[59,106],"first":[61],"work":[62,131],"execute":[64],"Whisper":[66],"CGRA":[70],"compare":[72],"its":[73],"performance":[74,92],"against":[75],"CPUs":[76],"GPUs.":[78],"Using":[79],"hardware/software":[80],"co-design,":[81],"system":[85],"via":[86],"an":[87,122],"FPGA":[88],"prototype":[89],"project":[91],"28nm":[95],"ASIC.":[96],"Our":[97],"results":[98],"demonstrate":[99],"superior":[100],"efficiency.":[102],"projected":[104],"ASIC":[105],"1.90":[107],"\u00d7":[108,119],"more":[109,120],"energy-efficient":[110],"than":[111,121],"NVIDIA":[113,123],"Jetson":[114],"AGX":[115],"Orin":[116],"9.83":[118],"RTX":[124],"4090":[125],"Q8_0":[128],"model.":[129],"This":[130],"positions":[132],"CGLA":[133],"as":[134],"promising":[136],"platform":[137],"sustainable":[139],"ASR":[140],"power-constrained":[142],"edge":[143],"devices.":[144]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2026-01-14T00:00:00"}
