{"id":"https://openalex.org/W4295856455","doi":"https://doi.org/10.1109/aicas54282.2022.9869924","title":"Enabling Energy-Efficient Inference for Self-Attention Mechanisms in Neural Networks","display_name":"Enabling Energy-Efficient Inference for Self-Attention Mechanisms in Neural Networks","publication_year":2022,"publication_date":"2022-06-13","ids":{"openalex":"https://openalex.org/W4295856455","doi":"https://doi.org/10.1109/aicas54282.2022.9869924"},"language":"en","primary_location":{"id":"doi:10.1109/aicas54282.2022.9869924","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas54282.2022.9869924","pdf_url":null,"source":{"id":"https://openalex.org/S4363608281","display_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041745327","display_name":"Qinyu Chen","orcid":"https://orcid.org/0009-0005-9480-6164"},"institutions":[{"id":"https://openalex.org/I148128674","display_name":"University of Shanghai for Science and Technology","ror":"https://ror.org/00ay9v204","country_code":"CN","type":"education","lineage":["https://openalex.org/I148128674"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qinyu Chen","raw_affiliation_strings":["Institute of Photonic Chips, University of Shanghai for Science and Technology,Shanghai,China","Institute of Photonic Chips, University of Shanghai for Science and Technology, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Photonic Chips, University of Shanghai for Science and Technology,Shanghai,China","institution_ids":["https://openalex.org/I148128674"]},{"raw_affiliation_string":"Institute of Photonic Chips, University of Shanghai for Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I148128674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079049469","display_name":"Congyi Sun","orcid":"https://orcid.org/0000-0002-5826-9040"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Congyi Sun","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University,Nanjing,China","School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072973899","display_name":"Zhonghai Lu","orcid":"https://orcid.org/0000-0003-0061-3475"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Zhonghai Lu","raw_affiliation_strings":["KTH-Royal Institute of Technology,Stockholm,Sweden","KTH-Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"KTH-Royal Institute of Technology,Stockholm,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"KTH-Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082967906","display_name":"Chang Gao","orcid":"https://orcid.org/0000-0002-3284-4078"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang Gao","raw_affiliation_strings":["Institute of Neuroinformatics, University of Z&#x00FC;rich and ETH Z&#x00FC;rich,Zurich,Switzerland"],"affiliations":[{"raw_affiliation_string":"Institute of Neuroinformatics, University of Z&#x00FC;rich and ETH Z&#x00FC;rich,Zurich,Switzerland","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5041745327"],"corresponding_institution_ids":["https://openalex.org/I148128674"],"apc_list":null,"apc_paid":null,"fwci":0.2987,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.63278921,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"25","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8491166830062866},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6581166982650757},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6150094866752625},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6087923645973206},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.587690532207489},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5815302729606628},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5637335777282715},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5599316954612732},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5411258935928345},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.4986748695373535},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4283805191516876},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.4225960969924927},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4221074879169464},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.40821710228919983},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3461587429046631},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.2918154299259186},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2157939076423645},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10746672749519348}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8491166830062866},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6581166982650757},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6150094866752625},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6087923645973206},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.587690532207489},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5815302729606628},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5637335777282715},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5599316954612732},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5411258935928345},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.4986748695373535},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4283805191516876},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.4225960969924927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4221074879169464},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.40821710228919983},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3461587429046631},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2918154299259186},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2157939076423645},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10746672749519348},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/aicas54282.2022.9869924","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas54282.2022.9869924","pdf_url":null,"source":{"id":"https://openalex.org/S4363608281","display_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","raw_type":"proceedings-article"},{"id":"pmh:oai:www.zora.uzh.ch:231282","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Chen, Qinyu; Sun, Congyi; Lu, Zhonghai; Gao, Chang  (2022). Enabling Energy-Efficient Inference for Self-Attention Mechanisms in Neural Networks.  In: 2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS), Incheon, Korea, Republic of, 13 June 2022 - 15 June 2022, IEEE.","raw_type":"Conference or Workshop Item"},{"id":"doi:10.5167/uzh-231282","is_oa":true,"landing_page_url":"https://doi.org/10.5167/uzh-231282","pdf_url":null,"source":{"id":"https://openalex.org/S7407051291","display_name":"Universit\u00e4t Z\u00fcrich, ZORA","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"pmh:oai:www.zora.uzh.ch:231282","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Chen, Qinyu; Sun, Congyi; Lu, Zhonghai; Gao, Chang  (2022). Enabling Energy-Efficient Inference for Self-Attention Mechanisms in Neural Networks.  In: 2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS), Incheon, Korea, Republic of, 13 June 2022 - 15 June 2022, IEEE.","raw_type":"Conference or Workshop Item"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8999999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2788838111","https://openalex.org/W2896457183","https://openalex.org/W2963846996","https://openalex.org/W2965373594","https://openalex.org/W3017024317","https://openalex.org/W3027324516","https://openalex.org/W3047848469","https://openalex.org/W3096609285","https://openalex.org/W3189877953","https://openalex.org/W3206196385","https://openalex.org/W4385245566","https://openalex.org/W6737236263","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6766673545"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2027972911","https://openalex.org/W2157978810","https://openalex.org/W4391547476","https://openalex.org/W2597809628"],"abstract_inverted_index":{"The":[0,64,145],"study":[1],"of":[2,98],"specialized":[3,135],"accelerators":[4,21],"tailored":[5],"for":[6,25,111],"neural":[7,19,27,32,51],"networks":[8,28,33],"is":[9,48,120,138,147,162],"becoming":[10],"a":[11,134,150],"promising":[12],"topic":[13],"in":[14,102],"recent":[15],"years.":[16],"Such":[17],"existing":[18],"network":[20,52],"are":[22],"usually":[23],"designed":[24,139],"convolutional":[26],"(CNNs)":[29],"or":[30],"recurrent":[31],"have":[34,70],"been":[35,41],"(RNNs),":[36],"however,":[37],"less":[38,167],"attention":[39,45],"has":[40,87],"paid":[42],"to":[43,57,122,140],"the":[44,55,59,84,96,125,143,155,159],"mechanisms,":[46],"which":[47,92],"an":[49,107],"emerging":[50],"primitive":[53],"with":[54,95,166],"ability":[56],"identify":[58],"relations":[60],"within":[61],"input":[62,99],"entities.":[63,100],"self-attention-oriented":[65],"models":[66],"such":[67],"as":[68,127,129],"Transformer":[69],"achieved":[71],"great":[72],"performance":[73],"on":[74,149],"natural":[75],"language":[76],"processing,":[77],"computer":[78],"vision":[79],"and":[80,132,154],"machine":[81],"translation.":[82],"However,":[83],"self-attention":[85,113,118],"mechanism":[86,119],"intrinsically":[88],"expensive":[89],"computational":[90],"workloads,":[91],"increase":[93,142],"quadratically":[94],"number":[97],"Therefore,":[101],"this":[103],"work,":[104],"we":[105],"propose":[106],"software-hardware":[108],"co-design":[109],"solution":[110],"energy-efficient":[112],"inference.":[114],"A":[115],"prediction-based":[116],"approximate":[117],"introduced":[121],"substantially":[123],"reduce":[124],"runtime":[126],"well":[128],"power":[130],"consumption,":[131],"then":[133],"hardware":[136],"architecture":[137],"further":[141],"speedup.":[144],"design":[146],"implemented":[148],"Xilinx":[151],"XC7Z035":[152],"FPGA,":[153],"results":[156],"show":[157],"that":[158],"energy":[160],"efficiency":[161],"improved":[163],"by":[164],"5.7x":[165],"than":[168],"1%":[169],"accuracy":[170],"loss.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-29T08:15:47.926485","created_date":"2025-10-10T00:00:00"}
