{"id":"https://openalex.org/W4400810916","doi":"https://doi.org/10.1109/aicas59952.2024.10595857","title":"A Low-Latency and Scalable Vector Engine with Operation Fusion for Transformers","display_name":"A Low-Latency and Scalable Vector Engine with Operation Fusion for Transformers","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4400810916","doi":"https://doi.org/10.1109/aicas59952.2024.10595857"},"language":"en","primary_location":{"id":"doi:10.1109/aicas59952.2024.10595857","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas59952.2024.10595857","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 6th International Conference on AI Circuits and Systems (AICAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072544388","display_name":"Mincheol Cha","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Mincheol Cha","raw_affiliation_strings":["Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113296003","display_name":"Keehyuk Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Keehyuk Lee","raw_affiliation_strings":["Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014900695","display_name":"Xuan Truong Nguyen","orcid":"https://orcid.org/0000-0002-7527-6971"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Xuan Truong Nguyen","raw_affiliation_strings":["Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024723558","display_name":"Hyuk\u2010Jae Lee","orcid":"https://orcid.org/0000-0001-6811-9647"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyuk-Jae Lee","raw_affiliation_strings":["Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Inter-University Semiconductor Research Center (ISRC),Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5072544388"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74908756,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"307","last_page":"311"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.655104398727417},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5933193564414978},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.519977867603302},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.47945988178253174},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.41462084650993347},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3399123549461365},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.23198562860488892},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.187763512134552},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14219138026237488},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12811625003814697},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.10532781481742859}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.655104398727417},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5933193564414978},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.519977867603302},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.47945988178253174},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.41462084650993347},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3399123549461365},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.23198562860488892},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.187763512134552},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14219138026237488},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12811625003814697},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.10532781481742859},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aicas59952.2024.10595857","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas59952.2024.10595857","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 6th International Conference on AI Circuits and Systems (AICAS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320337495","display_name":"Technology Development","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2153190325","https://openalex.org/W2965571038","https://openalex.org/W2970157301","https://openalex.org/W3047848469","https://openalex.org/W3094502228","https://openalex.org/W4206706211","https://openalex.org/W4281758439","https://openalex.org/W4293456264","https://openalex.org/W4300865759","https://openalex.org/W4308083513","https://openalex.org/W4385245566","https://openalex.org/W4385767699","https://openalex.org/W4389041654","https://openalex.org/W6694317859","https://openalex.org/W6767278793","https://openalex.org/W6780226713","https://openalex.org/W6784333009","https://openalex.org/W6838322825"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W4235240664","https://openalex.org/W2965083567","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W1983399550","https://openalex.org/W97075385","https://openalex.org/W2393741509"],"abstract_inverted_index":{"Recently,":[0],"transformer":[1],"models":[2],"have":[3],"been":[4],"widely":[5],"deployed":[6],"for":[7,67,124],"AI":[8],"services":[9],"at":[10],"data":[11,52],"centers.":[12],"However,":[13],"one":[14],"of":[15,24,120],"the":[16,21,56,102,131],"noticeable":[17],"deployment":[18],"challenges":[19],"is":[20],"intensive":[22],"usage":[23],"vector":[25,69],"operations":[26,82],"such":[27],"as":[28],"layer":[29],"normalization":[30],"(LayerNorm)":[31],"and":[32,42,50,63,83,95,108,122,126,145],"Softmax":[33,125],"that":[34,113],"generally":[35],"show":[36,112],"sub-optimal":[37],"performance":[38],"on":[39],"general-purpose":[40],"CPU":[41],"GPU":[43],"due":[44],"to":[45,77],"their":[46],"low":[47],"arithmetic":[48],"intensities":[49],"long":[51],"dependency.":[53],"To":[54],"address":[55],"problem,":[57],"this":[58],"study":[59],"presents":[60],"a":[61,74,117],"low-latency":[62],"scalable":[64],"FPGA-based":[65],"engine":[66,90],"accelerating":[68],"operations.":[70,86],"Specifically,":[71],"we":[72],"built":[73],"dedicated":[75],"circuit":[76],"effectively":[78],"execute":[79],"both":[80],"element-wise":[81],"compound":[84],"fused":[85],"More":[87],"importantly,":[88],"our":[89,114],"can":[91],"calculate":[92],"input":[93],"mean":[94],"variance":[96],"in":[97,105],"parallel,":[98],"which":[99],"significantly":[100],"reduces":[101],"instruction":[103],"count":[104],"computing":[106],"LayerNorm":[107],"Softmax.":[109],"Experimental":[110],"results":[111],"design":[115],"achieves":[116],"latency":[118],"reduction":[119],"50%":[121],"40%":[123],"LayerNorm,":[127],"respectively,":[128],"compared":[129],"with":[130],"SOTA":[132],"design,":[133],"while":[134],"only":[135],"consuming":[136],"an":[137],"additional":[138],"20%":[139],"DSPs,":[140],"27%":[141],"BRAMs,":[142],"18%":[143],"FFs,":[144],"39%":[146],"LUTs.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
