{"id":"https://openalex.org/W4386763875","doi":"https://doi.org/10.1109/dac56929.2023.10247993","title":"Efficient Transformer Inference with Statically Structured Sparse Attention","display_name":"Efficient Transformer Inference with Statically Structured Sparse Attention","publication_year":2023,"publication_date":"2023-07-09","ids":{"openalex":"https://openalex.org/W4386763875","doi":"https://doi.org/10.1109/dac56929.2023.10247993"},"language":"en","primary_location":{"id":"doi:10.1109/dac56929.2023.10247993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac56929.2023.10247993","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 60th ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030894829","display_name":"Steve Dai","orcid":"https://orcid.org/0000-0002-5045-1964"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Steve Dai","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015017700","display_name":"Hasan Genc","orcid":"https://orcid.org/0000-0001-8596-6135"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hasan Genc","raw_affiliation_strings":["University of California,Berkeley","University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045219356","display_name":"Rangharajan Venkatesan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rangharajan Venkatesan","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010156116","display_name":"Brucek Khailany","orcid":"https://orcid.org/0000-0002-7584-3489"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brucek Khailany","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5030894829"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9752,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.78005332,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7802286148071289},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7390367388725281},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6925922632217407},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6631026268005371},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6448041200637817},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5428982973098755},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4536267817020416},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.42030075192451477},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41752704977989197},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3696678876876831},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3418470025062561},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32198405265808105},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.08734104037284851}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7802286148071289},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7390367388725281},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6925922632217407},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6631026268005371},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6448041200637817},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5428982973098755},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4536267817020416},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.42030075192451477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41752704977989197},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3696678876876831},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3418470025062561},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32198405265808105},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.08734104037284851},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac56929.2023.10247993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac56929.2023.10247993","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 60th ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.9200000166893005}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2945260553","https://openalex.org/W2949591530","https://openalex.org/W2963748441","https://openalex.org/W2997929983","https://openalex.org/W3015468748","https://openalex.org/W3017746288","https://openalex.org/W3033529678","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3103682594","https://openalex.org/W3106298483","https://openalex.org/W3129415623","https://openalex.org/W3159727696","https://openalex.org/W3205531882","https://openalex.org/W4211185538","https://openalex.org/W4287704453","https://openalex.org/W4298422451","https://openalex.org/W4323654151","https://openalex.org/W4385245566","https://openalex.org/W6727099177","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6761628794","https://openalex.org/W6762122294","https://openalex.org/W6763509872","https://openalex.org/W6770018571","https://openalex.org/W6776048684","https://openalex.org/W6776320331","https://openalex.org/W6778485988","https://openalex.org/W6779163297","https://openalex.org/W6781533629","https://openalex.org/W6784333009","https://openalex.org/W6785783668","https://openalex.org/W6790889065","https://openalex.org/W6802560688"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703","https://openalex.org/W41015297"],"abstract_inverted_index":{"Self-attention":[0],"matrices":[1,47],"of":[2,12,32],"Transformers":[3],"are":[4],"often":[5],"highly":[6],"sparse":[7,41],"because":[8],"the":[9,25,29,64],"relevant":[10],"context":[11],"each":[13],"token":[14],"is":[15],"typically":[16],"limited":[17],"to":[18,74,92,100],"just":[19],"a":[20,86,101],"few":[21],"other":[22],"tokens":[23],"in":[24,108],"sequence.":[26],"To":[27,62],"reduce":[28],"computational":[30],"burden":[31],"self-attention":[33],"on":[34],"Transformer":[35],"inference,":[36],"we":[37,68,84,104],"propose":[38],"static,":[39],"structured,":[40],"attention":[42,46,77],"masks":[43],"that":[44],"split":[45],"into":[48],"dense":[49,88,102],"regions,":[50],"skipping":[51],"computations":[52,58],"outside":[53],"these":[54,60],"regions":[55],"while":[56,79],"reducing":[57],"inside":[59],"regions.":[61],"support":[63],"proposed":[65],"mask":[66],"structure,":[67],"design":[69],"an":[70],"entropy-aware":[71],"finetuning":[72],"algorithm":[73],"naturally":[75],"encourage":[76],"sparsity":[78,97],"maximizing":[80],"task":[81],"accuracy.":[82],"Furthermore,":[83],"extend":[85],"typical":[87],"deep":[89],"learning":[90],"accelerator":[91],"efficiently":[93],"exploit":[94],"our":[95],"structured":[96],"pattern.":[98],"Compared":[99],"baseline,":[103],"achieve":[105],"56.6%":[106],"reduction":[107],"energy":[109],"consumption,":[110],"58.9%":[111],"performance":[112],"improvement":[113],"with":[114],"<1%":[115],"accuracy":[116],"loss":[117],"and":[118],"2.6%":[119],"area":[120],"overhead.":[121]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
