{"id":"https://openalex.org/W4413755243","doi":"https://doi.org/10.1109/isvlsi65124.2025.11130263","title":"Low-Cost FlashAttention with Fused Exponential and Multiplication Hardware Operators","display_name":"Low-Cost FlashAttention with Fused Exponential and Multiplication Hardware Operators","publication_year":2025,"publication_date":"2025-07-06","ids":{"openalex":"https://openalex.org/W4413755243","doi":"https://doi.org/10.1109/isvlsi65124.2025.11130263"},"language":"en","primary_location":{"id":"doi:10.1109/isvlsi65124.2025.11130263","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isvlsi65124.2025.11130263","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111262390","display_name":"Kosmas Alexandridis","orcid":null},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Kosmas Alexandridis","raw_affiliation_strings":["Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece"],"affiliations":[{"raw_affiliation_string":"Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece","institution_ids":["https://openalex.org/I147962203"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093264909","display_name":"Vasileios Titopoulos","orcid":"https://orcid.org/0009-0009-0123-5737"},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vasileios Titopoulos","raw_affiliation_strings":["Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece"],"affiliations":[{"raw_affiliation_string":"Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece","institution_ids":["https://openalex.org/I147962203"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074704256","display_name":"Giorgos Dimitrakopoulos","orcid":"https://orcid.org/0000-0003-3688-7865"},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Giorgos Dimitrakopoulos","raw_affiliation_strings":["Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece"],"affiliations":[{"raw_affiliation_string":"Democritus University of Thrace,Integrated Circuits Lab Electrical and Computer Engineering,Xanthi,Greece","institution_ids":["https://openalex.org/I147962203"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111262390"],"corresponding_institution_ids":["https://openalex.org/I147962203"],"apc_list":null,"apc_paid":null,"fwci":2.9117,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.92343514,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9527000188827515,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9241999983787537,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.7216288447380066},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5877806544303894},{"id":"https://openalex.org/keywords/exponential-function","display_name":"Exponential function","score":0.5440716743469238},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.34802621603012085},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.33358272910118103},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2118617594242096}],"concepts":[{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.7216288447380066},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5877806544303894},{"id":"https://openalex.org/C151376022","wikidata":"https://www.wikidata.org/wiki/Q168698","display_name":"Exponential function","level":2,"score":0.5440716743469238},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.34802621603012085},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.33358272910118103},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2118617594242096},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isvlsi65124.2025.11130263","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isvlsi65124.2025.11130263","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322923","display_name":"Siemens","ror":"https://ror.org/059mq0909"},{"id":"https://openalex.org/F4320330618","display_name":"Infineon Technologies","ror":"https://ror.org/005kw6t15"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2057797376","https://openalex.org/W2090319426","https://openalex.org/W2036954759","https://openalex.org/W2506252583","https://openalex.org/W4406620725","https://openalex.org/W2048249848"],"abstract_inverted_index":{"Attention":[0],"mechanisms,":[1],"particularly":[2],"within":[3],"Transformer":[4],"architectures":[5,144],"and":[6,18,69,95,110,133,148],"large":[7],"language":[8],"models":[9],"(LLMs),":[10],"have":[11,31],"revolutionized":[12],"sequence":[13],"modeling":[14],"in":[15,40,119,131,136],"machine":[16],"learning":[17],"artificial":[19],"intelligence":[20],"applications.":[21],"To":[22],"compute":[23],"attention":[24,37],"for":[25,58],"increasingly":[26],"long":[27],"sequences,":[28],"specialized":[29],"accelerators":[30],"been":[32],"proposed":[33,46,102],"to":[34,63,141],"execute":[35],"key":[36],"steps":[38],"directly":[39],"hardware.":[41],"Among":[42],"the":[43,53,80,91,108],"various":[44],"recently":[45],"architectures,":[47],"those":[48],"based":[49],"on":[50,78,138],"variants":[51],"of":[52,82,93,113,128],"FlashAttention":[54,84],"algorithm,":[55],"originally":[56],"designed":[57],"GPUs,":[59],"stand":[60],"out":[61],"due":[62],"their":[64],"optimized":[65],"computation,":[66],"tiling":[67],"capabilities,":[68],"reduced":[70],"memory":[71],"traffic.":[72],"In":[73],"this":[74],"work,":[75],"we":[76],"focus":[77],"optimizing":[79],"kernel":[81],"floating-point-based":[83],"using":[85],"new":[86],"hardware":[87,104,115,143,151],"operators":[88,105],"that":[89],"fuse":[90],"computation":[92],"exponentials":[94,147],"vector":[96,149],"multiplications,":[97],"e.g.,":[98],"$e^{x}":[99],"V$.":[100],"The":[101],"ExpMul":[103],"significantly":[106],"reduce":[107],"area":[109,132],"power":[111],"costs":[112],"FlashAttention-based":[114],"accelerators.":[116],"When":[117],"implemented":[118],"a":[120],"28":[121],"nm":[122],"ASIC":[123],"technology,":[124],"they":[125],"achieve":[126],"improvements":[127],"$28.8":[129],"\\%$":[130,135],"$17.6":[134],"power,":[137],"average,":[139],"compared":[140],"state-of-the-art":[142],"with":[145],"separate":[146],"multiplications":[150],"operators.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
