{"id":"https://openalex.org/W4389476299","doi":"https://doi.org/10.1145/3613424.3614312","title":"Eureka: Efficient Tensor Cores for One-sided Unstructured Sparsity in DNN Inference","display_name":"Eureka: Efficient Tensor Cores for One-sided Unstructured Sparsity in DNN Inference","publication_year":2023,"publication_date":"2023-10-28","ids":{"openalex":"https://openalex.org/W4389476299","doi":"https://doi.org/10.1145/3613424.3614312"},"language":"en","primary_location":{"id":"doi:10.1145/3613424.3614312","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3613424.3614312","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3613424.3614312","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"56th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3613424.3614312","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001458388","display_name":"Ashish Gondimalla","orcid":"https://orcid.org/0000-0002-3370-3576"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ashish Gondimalla","raw_affiliation_strings":["Google, USA"],"raw_orcid":"https://orcid.org/0000-0002-3370-3576","affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069139257","display_name":"Mithuna Thottethodi","orcid":"https://orcid.org/0000-0003-4164-4542"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mithuna Thottethodi","raw_affiliation_strings":["Purdue University, United States of America"],"raw_orcid":"https://orcid.org/0000-0003-4164-4542","affiliations":[{"raw_affiliation_string":"Purdue University, United States of America","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103145581","display_name":"T. N. Vijaykumar","orcid":"https://orcid.org/0000-0001-6624-4372"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"T. N. Vijaykumar","raw_affiliation_strings":["Purdue University, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-6624-4372","affiliations":[{"raw_affiliation_string":"Purdue University, United States of America","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5001458388"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":3.1159,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.93001186,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"324","last_page":"337"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.784957230091095},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.692264199256897},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6489861607551575},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3752822279930115},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19603502750396729},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.06363877654075623}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.784957230091095},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.692264199256897},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6489861607551575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3752822279930115},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19603502750396729},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.06363877654075623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3613424.3614312","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3613424.3614312","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3613424.3614312","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"56th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3613424.3614312","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3613424.3614312","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3613424.3614312","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"56th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309036","display_name":"Purdue University","ror":"https://ror.org/02dqehb95"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4389476299.pdf","grobid_xml":"https://content.openalex.org/works/W4389476299.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2183341477","https://openalex.org/W2285660444","https://openalex.org/W2516141709","https://openalex.org/W2541839172","https://openalex.org/W2588191434","https://openalex.org/W2606722458","https://openalex.org/W2625457103","https://openalex.org/W2794952988","https://openalex.org/W2900228909","https://openalex.org/W2904902077","https://openalex.org/W2931118404","https://openalex.org/W2949870694","https://openalex.org/W2963748441","https://openalex.org/W2963989532","https://openalex.org/W2979439447","https://openalex.org/W2980186997","https://openalex.org/W3012178976","https://openalex.org/W3016832937","https://openalex.org/W3024621361","https://openalex.org/W3043303806","https://openalex.org/W3102510044","https://openalex.org/W3103168911","https://openalex.org/W3158831985","https://openalex.org/W3185702163","https://openalex.org/W3187908937","https://openalex.org/W3190062760","https://openalex.org/W4240168186"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032","https://openalex.org/W2382290278"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs),":[3],"while":[4,110,188,226],"enormously":[5],"popular,":[6],"continue":[7],"to":[8,92,133,147,217,234,268,279],"place":[9],"ever":[10],"higher":[11],"compute":[12,27],"demand":[13,28],"for":[14,73,159,243,251,254],"which":[15,49,70],"GPUs":[16],"provide":[17,247],"specialized":[18],"matrix":[19],"multipliers":[20],"called":[21],"tensor":[22,33,62,150,157,263],"cores.":[23],"To":[24,142,257],"reduce":[25],"the":[26,40,60,148,186,222,227,231,262,269,282,292],"via":[29,166],"sparsity,":[30,273],"Nvidia":[31],"Ampere\u2019s":[32],"cores":[34,63],"support":[35],"2:4":[36,88,315],"structured":[37],"sparsity":[38,53,89,105,118],"in":[39,95,137,210,221,230,261],"filters":[41,284],"(i.e.,":[42],"two":[43],"non-zeros":[44],"out":[45],"of":[46,101,139,178,185,271,294,326],"four":[47],"values)":[48],"provides":[50,106],"uniform":[51],"50%":[52],"without":[54,99],"any":[55],"load":[56,112,140,164,182],"imbalance":[57,165],"issues.":[58],"Consequently,":[59],"sparse":[61,283,316],"maintain":[64],"(input":[65],"or":[66,214],"output)":[67],"operand":[68,135],"stationarity,":[69],"is":[71,90,173,239],"fundamental":[72],"avoiding":[74],"high-overhead":[75],"hardware,":[76],"requiring":[77],"only":[78,190],"one":[79],"extra":[80],"4-1":[81],"multiplexer":[82],"per":[83],"multiply-accumulate":[84],"unit":[85],"(MAC).":[86],"However,":[87],"limited":[91],"2x":[93],"improvements":[94],"performance":[96],"and":[97,128,303,306,308,314,320,323,328],"energy":[98,310],"loss":[100],"accuracy,":[102],"whereas":[103],"unstructured":[104,117,160,272],"5-6x":[107],"opportunity":[108],"albeit":[109],"causing":[111],"imbalance.":[113],"Previous":[114],"papers":[115],"on":[116,291],"incur":[119],"high":[120,145],"hardware":[121,193],"overhead":[122],"(e.g.,":[123],"buffering,":[124],"crossbars,":[125],"scatter-gather":[126],"networks,":[127],"address":[129],"calculators)":[130],"mainly":[131],"due":[132,267],"sacrificing":[134],"stationarity":[136,180],"favor":[138],"balance.":[141],"avoid":[143],"adding":[144],"overheads":[146,325],"highly-efficient":[149],"cores,":[151],"we":[152,196,274],"propose":[153,197,275],"Eureka,":[154],"an":[155,240,248],"efficient":[156],"core":[158],"sparsity.":[161],"Eureka":[162,300],"addresses":[163],"three":[167],"contributions:":[168],"(1)":[169],"Our":[170,296],"key":[171],"insight":[172],"that":[174,299],"a":[175,191,203,218],"slight":[176],"weakening":[177],"output":[179,236],"achieves":[181,301],"balance":[183],"most":[184],"time":[187],"incurring":[189],"modest":[192],"overhead.":[194],"Accordingly,":[195],"single-step":[198],"uni-directional":[199],"displacement":[200],"(SUDS),":[201],"where":[202],"filter":[204],"element\u2019s":[205],"multiplication":[206],"can":[207],"either":[208],"occur":[209],"its":[211],"original":[212,232],"position":[213],"be":[215],"displaced":[216],"vacant":[219],"MAC":[220],"adjacent":[223],"row":[224,233],"below":[225],"accumulation":[228],"occurs":[229],"restore":[235],"stationarity.":[237],"SUDS":[238],"offline":[241,276],"technique":[242],"inference.":[244],"(2)":[245],"We":[246],"optimal":[249],"algorithm":[250],"work":[252],"assignment":[253],"SUDS.":[255],"(3)":[256],"achieve":[258],"fewer":[259],"bubbles":[260],"core\u2019s":[264],"systolic":[265,277],"pipeline":[266],"irregularity":[270],"scheduling":[278],"group":[280],"together":[281],"with":[285],"similar,":[286],"statically-known":[287],"execution":[288],"times":[289],"(based":[290],"number":[293],"non-zeros).":[295],"evaluation":[297],"shows":[298],"4.8x":[302],"2.4x":[304],"speedups,":[305],"3.1x":[307],"1.8x":[309],"reductions":[311],"over":[312,331],"dense":[313],"(Ampere)":[317],"implementations,":[318],"respectively,":[319,330],"incurs":[321],"area":[322],"power":[324],"6%":[327],"11.5%,":[329],"Ampere.":[332]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
