{"id":"https://openalex.org/W4281800613","doi":"https://doi.org/10.1145/3470496.3527419","title":"Cascading structured pruning","display_name":"Cascading structured pruning","publication_year":2022,"publication_date":"2022-05-31","ids":{"openalex":"https://openalex.org/W4281800613","doi":"https://doi.org/10.1145/3470496.3527419"},"language":"en","primary_location":{"id":"doi:10.1145/3470496.3527419","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3470496.3527419","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074284629","display_name":"Edward Hanson","orcid":"https://orcid.org/0000-0001-5179-8401"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Edward Hanson","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413190","display_name":"Shiyu Li","orcid":"https://orcid.org/0000-0002-1990-7150"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shiyu Li","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai 'Helen' Li","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058073627","display_name":"Yiran Chen","orcid":"https://orcid.org/0000-0002-1486-8412"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5074284629"],"corresponding_institution_ids":["https://openalex.org/I170897317"],"apc_list":null,"apc_paid":null,"fwci":2.5522,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.908649,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"522","last_page":"535"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.8675427436828613},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8287670612335205},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7310643792152405},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7220460772514343},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6790086627006531},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5991894602775574},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5224032402038574},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5112996101379395},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4780562222003937},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.4203195571899414},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.412800133228302},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.404599130153656},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.13350147008895874}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.8675427436828613},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8287670612335205},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7310643792152405},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7220460772514343},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6790086627006531},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5991894602775574},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5224032402038574},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5112996101379395},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4780562222003937},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.4203195571899414},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.412800133228302},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.404599130153656},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13350147008895874},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3470496.3527419","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3470496.3527419","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.9100000262260437}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W2163605009","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2604319603","https://openalex.org/W2736953746","https://openalex.org/W2751366252","https://openalex.org/W2798275680","https://openalex.org/W2808168148","https://openalex.org/W2900228909","https://openalex.org/W2900327659","https://openalex.org/W2904607988","https://openalex.org/W2904902077","https://openalex.org/W2928560789","https://openalex.org/W2940862705","https://openalex.org/W2944355599","https://openalex.org/W2949870694","https://openalex.org/W2950656546","https://openalex.org/W2952682304","https://openalex.org/W2962818002","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2980200167","https://openalex.org/W3023151209","https://openalex.org/W3092182048","https://openalex.org/W3104393472","https://openalex.org/W3118608800","https://openalex.org/W3206453033","https://openalex.org/W4251575795","https://openalex.org/W6615861906","https://openalex.org/W6676297131","https://openalex.org/W6687483927","https://openalex.org/W6725543821","https://openalex.org/W6726275242","https://openalex.org/W6731500867","https://openalex.org/W6739901393","https://openalex.org/W6756118020","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W2293118914","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W3167919718","https://openalex.org/W4251718783","https://openalex.org/W2171015181","https://openalex.org/W2187181201","https://openalex.org/W4384937382"],"abstract_inverted_index":{"Performance":[0],"and":[1,32,49,59,71,130,136,178],"efficiency":[2,94,210],"of":[3,127],"running":[4],"modern":[5],"Deep":[6],"Neural":[7],"Networks":[8],"(DNNs)":[9],"are":[10],"heavily":[11],"bounded":[12],"by":[13],"data":[14,19,61,68,88,129,156],"movement.":[15],"To":[16],"mitigate":[17],"the":[18,109,161,185],"movement":[20],"bottlenecks,":[21],"recent":[22,101],"DNN":[23],"inference":[24],"accelerator":[25],"designs":[26],"widely":[27],"adopt":[28],"aggressive":[29],"compression":[30,126],"techniques":[31],"sparse-skipping":[33,53,181],"mechanisms.":[34],"These":[35],"mechanisms":[36],"avoid":[37],"transferring":[38],"or":[39,44,216],"computing":[40],"with":[41,148,187,214],"zero-valued":[42],"weights":[43],"activations":[45],"to":[46,100,133,152,183],"save":[47],"time":[48],"energy.":[50],"However,":[51],"such":[52,104],"logic":[54],"involves":[55],"large":[56,164],"input":[57,165],"buffers":[58],"irregular":[60],"access":[62,132,153],"patterns,":[63],"thus":[64,159],"precluding":[65],"many":[66],"energy-efficient":[67],"reuse":[69,89],"opportunities":[70,90],"dataflows.":[72],"In":[73],"this":[74],"work,":[75],"we":[76],"propose":[77],"Cascading":[78],"Structured":[79],"Pruning":[80],"(CSP),":[81],"a":[82,118,149,173,179],"technique":[83],"that":[84,122,203],"preserves":[85],"significantly":[86],"more":[87],"for":[91,124,163],"higher":[92],"energy":[93,209],"while":[95],"maintaining":[96],"comparable":[97,215],"performance":[98],"relative":[99],"sparse":[102],"architectures":[103],"as":[105],"SparTen.":[106],"CSP":[107,204],"includes":[108],"following":[110],"two":[111],"components:":[112],"At":[113,139],"algorithm":[114],"level,":[115,141],"CSP-A":[116],"induces":[117],"predictable":[119],"sparsity":[120,146],"pattern":[121,147],"allows":[123],"low-overhead":[125],"weight":[128,137],"sequential":[131],"both":[134],"activation":[135,155],"data.":[138],"architecture":[140],"CSP-H":[142,168],"leverages":[143],"CSP-A's":[144],"induced":[145],"novel":[150,174],"dataflow":[151,186],"unique":[154],"only":[157],"once,":[158],"removing":[160],"demand":[162],"buffers.":[166],"Each":[167],"processing":[169],"element":[170],"(PE)":[171],"employs":[172],"accumulation":[175],"buffer":[176],"design":[177],"counter-based":[180],"mechanism":[182],"support":[184],"minimum":[188],"controller":[189],"overhead.":[190],"We":[191],"verify":[192],"our":[193],"approach":[194],"on":[195,206],"several":[196],"representative":[197],"models.":[198],"Our":[199],"simulated":[200],"results":[201],"show":[202],"achieves":[205],"average":[207],"15\u00d7":[208],"improvement":[211],"over":[212],"SparTen":[213],"superior":[217],"speedup":[218],"under":[219],"most":[220],"evaluations.":[221]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
