{"id":"https://openalex.org/W3091174280","doi":"https://doi.org/10.1145/3410463.3414648","title":"Accelerating Sparse CNN Inference on GPUs with Performance-Aware Weight Pruning","display_name":"Accelerating Sparse CNN Inference on GPUs with Performance-Aware Weight Pruning","publication_year":2020,"publication_date":"2020-09-30","ids":{"openalex":"https://openalex.org/W3091174280","doi":"https://doi.org/10.1145/3410463.3414648","mag":"3091174280"},"language":"en","primary_location":{"id":"doi:10.1145/3410463.3414648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3410463.3414648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071522065","display_name":"Masuma Akter Rumi","orcid":null},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Masuma Akter Rumi","raw_affiliation_strings":["The University of Iowa, Iowa City, IA, USA"],"affiliations":[{"raw_affiliation_string":"The University of Iowa, Iowa City, IA, USA","institution_ids":["https://openalex.org/I126307644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016070401","display_name":"Xiaolong Ma","orcid":"https://orcid.org/0000-0003-3753-7648"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaolong Ma","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100651384","display_name":"Yanzhi Wang","orcid":"https://orcid.org/0000-0002-3024-7990"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhi Wang","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070002691","display_name":"Peng Jiang","orcid":"https://orcid.org/0000-0001-7743-6062"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peng Jiang","raw_affiliation_strings":["The University of Iowa, Iowa City, IA, USA"],"affiliations":[{"raw_affiliation_string":"The University of Iowa, Iowa City, IA, USA","institution_ids":["https://openalex.org/I126307644"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5071522065"],"corresponding_institution_ids":["https://openalex.org/I126307644"],"apc_list":null,"apc_paid":null,"fwci":2.2752,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.90206962,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"267","last_page":"278"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.8353114724159241},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7723730802536011},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.6525020003318787},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6511191725730896},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6215671300888062},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6117151975631714},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5854536890983582},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5531139969825745},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5069308876991272},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.466470330953598},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.4525226950645447},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.450514018535614},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4111713767051697},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33881956338882446},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2633628845214844}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.8353114724159241},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7723730802536011},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.6525020003318787},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6511191725730896},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6215671300888062},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6117151975631714},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5854536890983582},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5531139969825745},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5069308876991272},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.466470330953598},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.4525226950645447},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.450514018535614},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4111713767051697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33881956338882446},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2633628845214844},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3410463.3414648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3410463.3414648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1789336918","https://openalex.org/W1985312666","https://openalex.org/W2097117768","https://openalex.org/W2108598243","https://openalex.org/W2155893237","https://openalex.org/W2271840356","https://openalex.org/W2530879419","https://openalex.org/W2552839021","https://openalex.org/W2617031569","https://openalex.org/W2783437385","https://openalex.org/W2785418329","https://openalex.org/W2798170643","https://openalex.org/W2798508334","https://openalex.org/W2914304175","https://openalex.org/W2946046356","https://openalex.org/W2962818002","https://openalex.org/W2963363373","https://openalex.org/W2964337156","https://openalex.org/W2979429416","https://openalex.org/W2980186997","https://openalex.org/W2996874060","https://openalex.org/W2999347085","https://openalex.org/W3104263540","https://openalex.org/W3104849992"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W2982536526","https://openalex.org/W2995343971","https://openalex.org/W3092292339","https://openalex.org/W2992221004","https://openalex.org/W2891818448"],"abstract_inverted_index":{"Weight":[0],"pruning":[1,28],"is":[2,91,114],"a":[3,104,153],"popular":[4],"technique":[5],"to":[6,33,39,53,79,115,135,167],"reduce":[7],"the":[8,14,24,34,40,44,55,64,69,87,94,100,122,159,164,170,188,198],"size":[9],"and":[10,83,126],"computation":[11],"complexity":[12],"of":[13,57,108,119,191,201],"Convolutional":[15],"Neural":[16],"Networks":[17],"(CNNs).":[18],"Despite":[19],"its":[20],"success":[21],"in":[22,43,121,163],"reducing":[23],"model":[25],"size,":[26],"weight":[27],"has":[29],"brought":[30],"limited":[31],"benefit":[32],"CNN":[35,179,202],"inference":[36],"performance,":[37],"due":[38],"irregularity":[41],"introduced":[42],"sparse":[45,58,75,81,109,123,165,192],"convolution":[46,124,193],"operations.":[47],"In":[48],"this":[49],"work,":[50],"we":[51,84,102,151],"aim":[52],"improve":[54,169,187],"performance":[56,71,89,190,200],"convolutions":[59],"on":[60,99],"GPUs":[61],"by":[62,93],"mitigating":[63],"irregularity.":[65],"We":[66],"find":[67],"that":[68,86,157,182],"existing":[70],"optimization":[72],"techniques":[73,184],"for":[74,131],"matrix":[76],"computations":[77],"fail":[78],"accelerate":[80],"convolutions,":[82],"observe":[85],"main":[88,112],"bottleneck":[90],"caused":[92],"heavy":[95],"control-flow":[96],"instructions.":[97],"Based":[98],"observation,":[101],"proposed":[103],"new":[105],"GEMM-based":[106],"implementation":[107],"convolutions.":[110],"Our":[111],"idea":[113],"extract":[116],"dense":[117,128,133,149],"blocks":[118,134],"non-zeros":[120],"kernels,":[125],"use":[127],"matrix-matrix":[129],"multiplication":[130],"these":[132],"achieve":[136],"high":[137],"throughput.":[138,171],"For":[139],"cases":[140],"where":[141],"many":[142],"non-zero":[143],"weights":[144,162],"cannot":[145],"be":[146],"grouped":[147],"into":[148],"blocks,":[150],"propose":[152],"performance-aware":[154],"re-pruning":[155],"strategy":[156],"removes":[158],"least":[160],"important":[161],"kernels":[166],"further":[168],"The":[172],"experimental":[173],"results":[174],"with":[175],"five":[176],"real-world":[177],"pruned":[178],"models":[180],"show":[181],"our":[183],"can":[185],"significantly":[186],"layer-wise":[189],"operations":[194],"as":[195,197],"well":[196],"end-to-end":[199],"inference.":[203]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
