{"id":"https://openalex.org/W4391827186","doi":"https://doi.org/10.1109/tc.2024.3365942","title":"Accelerating Sparse DNNs Based on Tiled GEMM","display_name":"Accelerating Sparse DNNs Based on Tiled GEMM","publication_year":2024,"publication_date":"2024-02-14","ids":{"openalex":"https://openalex.org/W4391827186","doi":"https://doi.org/10.1109/tc.2024.3365942"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2024.3365942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3365942","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101706991","display_name":"Cong Guo","orcid":"https://orcid.org/0000-0002-4479-5525"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Cong Guo","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-4479-5525","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102007698","display_name":"Fengchen Xue","orcid":"https://orcid.org/0009-0001-6080-5703"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fengchen Xue","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-6080-5703","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003939279","display_name":"Jingwen Leng","orcid":"https://orcid.org/0000-0002-5660-5493"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwen Leng","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-5660-5493","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037125614","display_name":"Yuxian Qiu","orcid":"https://orcid.org/0000-0003-4040-0159"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuxian Qiu","raw_affiliation_strings":["NVIDIA, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-4040-0159","affiliations":[{"raw_affiliation_string":"NVIDIA, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091217469","display_name":"Yue Guan","orcid":"https://orcid.org/0009-0005-7433-2627"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Guan","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-7433-2627","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008837660","display_name":"Weihao Cui","orcid":"https://orcid.org/0000-0002-6646-5260"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Cui","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-6646-5260","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-5832-0347","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039318240","display_name":"Minyi Guo","orcid":"https://orcid.org/0000-0003-0034-2302"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-0034-2302","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101706991"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":3.5901,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.93855852,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"73","issue":"5","first_page":"1275","last_page":"1289"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11609","display_name":"Geophysical Methods and Applications","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6999264359474182},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5456791520118713},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.44447457790374756},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.39547520875930786},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10926124453544617}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6999264359474182},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5456791520118713},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.44447457790374756},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39547520875930786},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10926124453544617},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2024.3365942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3365942","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.41999998688697815}],"awards":[{"id":"https://openalex.org/G3940808221","display_name":null,"funder_award_id":"62222210","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4761656129","display_name":null,"funder_award_id":"2021ZD0110104","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8806450724","display_name":null,"funder_award_id":"U21B2017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8826274250","display_name":null,"funder_award_id":"62072297","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W2101105183","https://openalex.org/W2194775991","https://openalex.org/W2606722458","https://openalex.org/W2618530766","https://openalex.org/W2707890836","https://openalex.org/W2765315405","https://openalex.org/W2767785892","https://openalex.org/W2896457183","https://openalex.org/W2899244816","https://openalex.org/W2923014074","https://openalex.org/W2954698171","https://openalex.org/W2963324947","https://openalex.org/W2963748441","https://openalex.org/W2965862774","https://openalex.org/W2980186997","https://openalex.org/W3016542674","https://openalex.org/W3017746288","https://openalex.org/W3092319711","https://openalex.org/W3132616766","https://openalex.org/W3187908937","https://openalex.org/W3205706264","https://openalex.org/W3210432446","https://openalex.org/W3211878177","https://openalex.org/W4221162983","https://openalex.org/W4293023328","https://openalex.org/W4302296459","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4384705353","https://openalex.org/W4385245566","https://openalex.org/W6631660994","https://openalex.org/W6637151318","https://openalex.org/W6637373629","https://openalex.org/W6638783484","https://openalex.org/W6725543821","https://openalex.org/W6739917289","https://openalex.org/W6746331415","https://openalex.org/W6755207826","https://openalex.org/W6766978945","https://openalex.org/W6776320331","https://openalex.org/W6776767859","https://openalex.org/W6810610777"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W3202552726","https://openalex.org/W2279642117"],"abstract_inverted_index":{"Network":[0],"pruning":[1,77,122],"can":[2,158],"reduce":[3],"the":[4,58,68,82,89,101,112,124,133,139,144,149,153,177,183,186,197],"computation":[5],"cost":[6],"of":[7,190],"deep":[8],"neural":[9],"network":[10],"(DNN)":[11],"models.":[12,180],"However,":[13],"sparse":[14,29,63,154,178],"models":[15,30],"often":[16],"produce":[17],"randomly-distributed":[18],"weights":[19],"to":[20,24,127,169],"maintain":[21,128],"accuracy,":[22],"leading":[23],"irregular":[25,121],"computations.":[26,42],"Consequently,":[27],"unstructured":[28,203],"cannot":[31],"achieve":[32],"meaningful":[33],"speedup":[34],"on":[35,185],"commodity":[36],"hardware":[37],"built":[38],"for":[39,53,96,115,120],"dense":[40,198],"matrix":[41,85,91],"Accelerators":[43],"are":[44],"usually":[45],"modified":[46],"or":[47],"designed":[48],"with":[49],"structured":[50,108],"sparsity-optimized":[51],"architectures":[52],"exploiting":[54],"sparsity.":[55,204],"For":[56],"example,":[57],"Ampere":[59],"architecture":[60],"introduces":[61],"a":[62,76,107,164],"tensor":[64,155],"core,":[65],"which":[66,105],"adopts":[67],"2:4":[69,145],"sparsity":[70,103,109,135,146,167,173],"pattern.":[71],"<p":[72],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[73],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">We":[74],"propose":[75],"method":[78],"that":[79,84],"builds":[80],"upon":[81],"insight":[83],"multiplication":[86],"generally":[87],"breaks":[88],"large":[90],"into":[92,163],"multiple":[93],"smaller":[94],"tiles":[95],"parallel":[97],"execution.":[98],"We":[99,157,181],"present":[100],"\u201ctile-wise\u201d":[102],"pattern,":[104],"maintains":[106],"pattern":[110,168],"at":[111,123,138,148],"tile":[113],"level":[114,151],"efficient":[116],"execution":[117],"but":[118],"allows":[119],"global":[125,140],"scale":[126],"high":[129],"accuracy.":[130],"In":[131],"addition,":[132],"tile-wise":[134],"is":[136],"implemented":[137],"memory":[141],"level,":[142],"and":[143,174,193,202],"executes":[147],"register":[150],"inside":[152],"core.":[156],"combine":[159],"these":[160],"two":[161],"patterns":[162],"\u201ctile-vector-wise\u201d":[165],"(TVW)":[166],"explore":[170],"more":[171],"fine-grained":[172],"further":[175],"accelerate":[176],"DNN":[179],"evaluate":[182],"TVW":[184],"GPU,":[187],"achieving":[188],"averages":[189],"1:85\u00d7,":[191],"2:75\u00d7,":[192],"22:18\u00d7":[194],"speedups":[195],"over":[196],"model,":[199],"block":[200],"sparsity,":[201]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-23T08:51:43.019350","created_date":"2025-10-10T00:00:00"}
