{"id":"https://openalex.org/W4409154894","doi":"https://doi.org/10.1109/tcad.2025.3558140","title":"Flex-SFU: Activation Function Acceleration With Nonuniform Piecewise Approximation","display_name":"Flex-SFU: Activation Function Acceleration With Nonuniform Piecewise Approximation","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409154894","doi":"https://doi.org/10.1109/tcad.2025.3558140"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2025.3558140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2025.3558140","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050957615","display_name":"Renzo Andri","orcid":"https://orcid.org/0000-0002-8776-5158"},"institutions":[{"id":"https://openalex.org/I4210146936","display_name":"Huawei Technologies (United States)","ror":"https://ror.org/03jyqk712","country_code":"US","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210146936"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Renzo Andri","raw_affiliation_strings":["Computing Systems Group, Huawei Zurich Research Center, Z&#x00FC;rich, Switzerland","Computing Systems Group, Huawei Zurich Research Center, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-8776-5158","affiliations":[{"raw_affiliation_string":"Computing Systems Group, Huawei Zurich Research Center, Z&#x00FC;rich, Switzerland","institution_ids":["https://openalex.org/I4210146936"]},{"raw_affiliation_string":"Computing Systems Group, Huawei Zurich Research Center, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064165125","display_name":"Enrico Reggiani","orcid":"https://orcid.org/0000-0003-1385-7962"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I4210146936","display_name":"Huawei Technologies (United States)","ror":"https://ror.org/03jyqk712","country_code":"US","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210146936"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":false,"raw_author_name":"Enrico Reggiani","raw_affiliation_strings":["Computing Systems Laboratory, Huawei Zurich Research Center, Z&#x00FC;rich, Switzerland","Huawei Zurich Research Center, Switzerland, and Barcelona Supercomputing Center, Spain"],"raw_orcid":"https://orcid.org/0000-0003-1385-7962","affiliations":[{"raw_affiliation_string":"Computing Systems Laboratory, Huawei Zurich Research Center, Z&#x00FC;rich, Switzerland","institution_ids":["https://openalex.org/I4210146936"]},{"raw_affiliation_string":"Huawei Zurich Research Center, Switzerland, and Barcelona Supercomputing Center, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025399641","display_name":"Lukas Cavigelli","orcid":"https://orcid.org/0000-0003-1767-7715"},"institutions":[{"id":"https://openalex.org/I4210146936","display_name":"Huawei Technologies (United States)","ror":"https://ror.org/03jyqk712","country_code":"US","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210146936"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lukas Cavigelli","raw_affiliation_strings":["Computing Systems Group, Huawei Zurich Research Center, Z&#x00FC;rich, Switzerland","Computing Systems Group, Huawei Zurich Research Center, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-1767-7715","affiliations":[{"raw_affiliation_string":"Computing Systems Group, Huawei Zurich Research Center, Z&#x00FC;rich, Switzerland","institution_ids":["https://openalex.org/I4210146936"]},{"raw_affiliation_string":"Computing Systems Group, Huawei Zurich Research Center, Switzerland","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5352,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.64483247,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"44","issue":"11","first_page":"4236","last_page":"4248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9365000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9365000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9190000295639038,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/flex","display_name":"FLEX","score":0.8571847677230835},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7344905138015747},{"id":"https://openalex.org/keywords/piecewise","display_name":"Piecewise","score":0.66220623254776},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5956687331199646},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5418307781219482},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.4716452956199646},{"id":"https://openalex.org/keywords/step-function","display_name":"Step function","score":0.4111771583557129},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.365734726190567},{"id":"https://openalex.org/keywords/classical-mechanics","display_name":"Classical mechanics","score":0.15869808197021484},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1540825366973877},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.09923246502876282}],"concepts":[{"id":"https://openalex.org/C2776252893","wikidata":"https://www.wikidata.org/wiki/Q1364836","display_name":"FLEX","level":2,"score":0.8571847677230835},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7344905138015747},{"id":"https://openalex.org/C164660894","wikidata":"https://www.wikidata.org/wiki/Q2037833","display_name":"Piecewise","level":2,"score":0.66220623254776},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5956687331199646},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5418307781219482},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.4716452956199646},{"id":"https://openalex.org/C98458673","wikidata":"https://www.wikidata.org/wiki/Q917657","display_name":"Step function","level":2,"score":0.4111771583557129},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.365734726190567},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.15869808197021484},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1540825366973877},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09923246502876282},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2025.3558140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2025.3558140","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1890351280","https://openalex.org/W2000956074","https://openalex.org/W2042519306","https://openalex.org/W2073256350","https://openalex.org/W2120052858","https://openalex.org/W2141699138","https://openalex.org/W2167425979","https://openalex.org/W2169621283","https://openalex.org/W2194775991","https://openalex.org/W2618530766","https://openalex.org/W2896478499","https://openalex.org/W2922301133","https://openalex.org/W2953212265","https://openalex.org/W2962834855","https://openalex.org/W2974644665","https://openalex.org/W2982219368","https://openalex.org/W3023212902","https://openalex.org/W3036878841","https://openalex.org/W3043675702","https://openalex.org/W3054316493","https://openalex.org/W3091983754","https://openalex.org/W3092319144","https://openalex.org/W3133253223","https://openalex.org/W3160664745","https://openalex.org/W4200126970","https://openalex.org/W4283451911","https://openalex.org/W4283791586","https://openalex.org/W4307079523","https://openalex.org/W4386764151"],"related_works":["https://openalex.org/W4250698707","https://openalex.org/W4244545186","https://openalex.org/W4247748223","https://openalex.org/W1008394927","https://openalex.org/W183173419","https://openalex.org/W2353392568","https://openalex.org/W2390507441","https://openalex.org/W3184653708","https://openalex.org/W2366626956","https://openalex.org/W2324609237"],"abstract_inverted_index":{"Modern":[0],"Deep":[1],"Neural":[2],"Networks":[3],"(DNN)":[4],"increasingly":[5],"use":[6],"activation":[7,39,72,193],"functions":[8,40,194],"with":[9,147,156],"computationally":[10],"complex":[11],"operations.":[12],"This":[13,30,92,153],"creates":[14],"a":[15,34,82,97,144],"challenge":[16],"for":[17,25,38,70,191],"current":[18],"hardware":[19,36,83,137],"accelerators,":[20],"which":[21],"are":[22],"primarily":[23],"optimized":[24],"convolutions":[26],"and":[27,46,54,89,121,159,165],"matrix-matrix":[28],"multiplications.":[29],"work":[31],"introduces":[32],"Flex-SFU,":[33],"lightweight":[35],"accelerator":[37],"that":[41,126,178],"uses":[42],"nonuniform":[43],"piecewise":[44,107],"interpolation":[45,88,109,188],"supports":[47],"multiple":[48],"data":[49],"formats":[50],"including":[51],"both":[52],"linear":[53,108],"quadratic":[55],"function":[56,63],"segments.We":[57],"optimize":[58],"the":[59,132,170,187],"parameters":[60],"of":[61,135,162,189],"these":[62],"approximations":[64],"offline":[65],"to":[66,105,143,169,185],"provide":[67],"drop-in":[68],"replacements":[69],"existing":[71],"functions.":[73],"Flex-SFU":[74,127,179],"incorporates":[75],"an":[76,157],"address":[77],"decoding":[78],"unit":[79],"based":[80],"on":[81,95,114,129,150],"binary-tree":[84],"search,":[85],"enabling":[86],"non-uniform":[87],"floating-point":[90],"support.":[91],"approach":[93],"achieves,":[94],"average,":[96,130],"22.3x":[98],"improvement":[99,154,199],"in":[100],"mean":[101],"squared":[102],"error":[103],"compared":[104],"previous":[106],"methods.":[110],"Our":[111],"evaluations,":[112],"conducted":[113],"more":[115],"than":[116],"600":[117],"state-of-the-art":[118],"neural":[119],"networks":[120],"100":[122],"vision":[123],"transformers,":[124],"demonstrate":[125,177],"can,":[128],"enhance":[131],"end-to-end":[133],"performance":[134],"AI":[136],"accelerators":[138],"by":[139,183],"35.7%,":[140],"achieving":[141,197],"up":[142,184],"3.3\u00d7":[145],"speedup":[146],"negligible":[148],"impact":[149],"model":[151],"accuracy.":[152,207],"comes":[155],"area":[158],"power":[160],"overhead":[161],"only":[163],"5.9%":[164],"0.8%,":[166],"respectively,":[167],"relative":[168],"baseline":[171],"vector":[172],"processing":[173],"unit.":[174],"Additionally,":[175],"we":[176],"can":[180],"accelerate":[181],"training":[182],"15.8":[186],"derivatives":[190],"common":[192],"during":[195],"backpropagation,":[196],"this":[198],"without":[200],"impacting":[201],"either":[202],"convergence":[203],"speed":[204],"or":[205],"final":[206]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
