{"id":"https://openalex.org/W4411688446","doi":"https://doi.org/10.1109/arith64983.2025.00016","title":"VEXP: A Low-Cost RISC-V ISA Extension for Accelerated Softmax Computation in Transformers","display_name":"VEXP: A Low-Cost RISC-V ISA Extension for Accelerated Softmax Computation in Transformers","publication_year":2025,"publication_date":"2025-05-04","ids":{"openalex":"https://openalex.org/W4411688446","doi":"https://doi.org/10.1109/arith64983.2025.00016"},"language":"en","primary_location":{"id":"doi:10.1109/arith64983.2025.00016","is_oa":false,"landing_page_url":"https://doi.org/10.1109/arith64983.2025.00016","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 32nd Symposium on Computer Arithmetic (ARITH)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008817923","display_name":"Run Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Run Wang","raw_affiliation_strings":["IIS, ETH,Zurich,Switzerland"],"affiliations":[{"raw_affiliation_string":"IIS, ETH,Zurich,Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073926165","display_name":"Gamze \u0130slamo\u011flu","orcid":"https://orcid.org/0000-0002-5129-1691"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Gamze Islamoglu","raw_affiliation_strings":["IIS, ETH,Zurich,Switzerland"],"affiliations":[{"raw_affiliation_string":"IIS, ETH,Zurich,Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115087249","display_name":"Andrea Belano","orcid":null},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea Belano","raw_affiliation_strings":["University of Bologna,DEI,Italy"],"affiliations":[{"raw_affiliation_string":"University of Bologna,DEI,Italy","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062172879","display_name":"Viviane Potocnik","orcid":"https://orcid.org/0009-0004-9412-6081"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Viviane Potocnik","raw_affiliation_strings":["IIS, ETH,Zurich,Switzerland"],"affiliations":[{"raw_affiliation_string":"IIS, ETH,Zurich,Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038717922","display_name":"Francesco Conti","orcid":"https://orcid.org/0000-0002-7924-933X"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Francesco Conti","raw_affiliation_strings":["University of Bologna,DEI,Italy"],"affiliations":[{"raw_affiliation_string":"University of Bologna,DEI,Italy","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052915995","display_name":"Angelo Garofalo","orcid":"https://orcid.org/0000-0002-7495-6895"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Angelo Garofalo","raw_affiliation_strings":["University of Bologna,DEI,Italy"],"affiliations":[{"raw_affiliation_string":"University of Bologna,DEI,Italy","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5106662582","display_name":"Luca Bonini","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Luca Bonini","raw_affiliation_strings":["IIS, ETH,Zurich,Switzerland"],"affiliations":[{"raw_affiliation_string":"IIS, ETH,Zurich,Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5008817923"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":1.5927,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.84766532,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"37","last_page":"44"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10558","display_name":"Advancements in Semiconductor Devices and Circuit Design","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9649999737739563,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12692","display_name":"Magnetic Field Sensors Techniques","score":0.9599999785423279,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6262494325637817},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.603676974773407},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5974475741386414},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.58304363489151},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.280592679977417},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18868035078048706},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10799646377563477},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.07266876101493835}],"concepts":[{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6262494325637817},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.603676974773407},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5974475741386414},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.58304363489151},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.280592679977417},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18868035078048706},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10799646377563477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.07266876101493835},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/arith64983.2025.00016","is_oa":false,"landing_page_url":"https://doi.org/10.1109/arith64983.2025.00016","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 32nd Symposium on Computer Arithmetic (ARITH)","raw_type":"proceedings-article"},{"id":"pmh:oai:cris.unibo.it:11585/1039995","is_oa":false,"landing_page_url":"https://hdl.handle.net/11585/1039995","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2137269967","https://openalex.org/W2903640996","https://openalex.org/W2981849318","https://openalex.org/W3013692244","https://openalex.org/W3016309475","https://openalex.org/W3036061420","https://openalex.org/W3043675702","https://openalex.org/W3211525823","https://openalex.org/W4293025109","https://openalex.org/W4312360312","https://openalex.org/W4385080022","https://openalex.org/W4385245566","https://openalex.org/W4386859272","https://openalex.org/W4389166736","https://openalex.org/W4391801288","https://openalex.org/W4400315200","https://openalex.org/W4401880104","https://openalex.org/W4402159576","https://openalex.org/W4402349404","https://openalex.org/W4402451456","https://openalex.org/W4402753294","https://openalex.org/W4409307140","https://openalex.org/W4409640809"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W3108503355","https://openalex.org/W3090555870","https://openalex.org/W4226420367","https://openalex.org/W2962876041","https://openalex.org/W3022820045"],"abstract_inverted_index":{"While":[0],"Transformers":[1],"are":[2],"dominated":[3],"by":[4],"Floating-Point":[5,72],"(FP)":[6],"Matrix-Multiplications,":[7],"their":[8],"aggressive":[9],"acceleration":[10],"through":[11,85],"dedicated":[12],"hardware":[13],"or":[14],"many-core":[15],"programmable":[16],"systems":[17],"has":[18],"shifted":[19],"the":[20,41,71,76,102,107,122,138,145],"performance":[21,129],"bottleneck":[22],"to":[23,33,105,121,152,169],"non-linear":[24,36],"functions":[25],"like":[26],"Softmax.":[27],"Accelerating":[28],"Softmax":[29,111],"is":[30],"challenging":[31],"due":[32],"its":[34],"non-pointwise,":[35],"nature,":[37],"with":[38,92,112,186],"exponentiation":[39,56],"as":[40,162],"most":[42],"demanding":[43],"step.":[44],"To":[45],"address":[46],"this,":[47],"we":[48,67,109],"design":[49],"a":[50,58,82,93,149],"custom":[51,86],"arithmetic":[52],"block":[53],"for":[54,137],"Bfloat16":[55],"leveraging":[57],"novel":[59],"approximation":[60],"algorithm":[61],"based":[62],"on":[63],"Schraudolph's":[64],"method,":[65],"and":[66,116,131,165,172,178,185],"integrate":[68],"it":[69],"into":[70],"Unit":[73],"(FPU)":[74],"of":[75,81,97,157],"RISC-":[77],"V":[78],"cores":[79],"[1]":[80],"compute":[83],"cluster,":[84,124],"Instruction":[87],"Set":[88],"Architecture":[89],"(ISA)":[90],"extensions,":[91],"negligible":[94,187],"area":[95],"overhead":[96],"1":[98],"%.":[99],"By":[100],"optimizing":[101],"software":[103],"kernels":[104],"leverage":[106],"extension,":[108],"execute":[110,154],"162.7x":[113],"less":[114,118],"latency":[115,177],"74.3x":[117],"energy":[119,135,179],"compared":[120],"baseline":[123],"achieving":[125,167],"an":[126],"8.2":[127],"x":[128,133,171,174],"improvement":[130],"4.1":[132],"higher":[134],"efficiency":[136],"FlashAttention-2":[139],"kernel":[140],"in":[141,176],"GPT-2":[142],"configuration.":[143],"Moreover,":[144],"proposed":[146],"approach":[147],"enables":[148],"multi-cluster":[150],"system":[151],"efficiently":[153],"end-to-end":[155],"inference":[156],"pre-trained":[158],"Transformer":[159],"models,":[160],"such":[161],"GPT-2,":[163],"GPT-3":[164],"ViT,":[166],"up":[168],"5.8":[170],"3.6":[173],"reduction":[175],"consumption,":[180],"respectively,":[181],"without":[182],"requiring":[183],"re-training":[184],"accuracy":[188],"loss.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
