{"id":"https://openalex.org/W4415250777","doi":"https://doi.org/10.1109/hpec67600.2025.11196476","title":"Accelerating Sparse Deep Learning via Multi-Layer Tensor Reordering and Partitioning","display_name":"Accelerating Sparse Deep Learning via Multi-Layer Tensor Reordering and Partitioning","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250777","doi":"https://doi.org/10.1109/hpec67600.2025.11196476"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196476","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088217473","display_name":"G\u00fcnd\u00fcz Vehbi Demirci","orcid":"https://orcid.org/0000-0001-9831-7062"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gunduz Vehbi Demirci","raw_affiliation_strings":["Wayve,London,UK"],"affiliations":[{"raw_affiliation_string":"Wayve,London,UK","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091534477","display_name":"\u00c7a\u011fatay Dikici","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cagatay Dikici","raw_affiliation_strings":["Wayve,London,UK"],"affiliations":[{"raw_affiliation_string":"Wayve,London,UK","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108262839","display_name":"Tim Atherton","orcid":null},"institutions":[{"id":"https://openalex.org/I4210133271","display_name":"Imagination Technologies (United Kingdom)","ror":"https://ror.org/03rf62a76","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210133271"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tim Atherton","raw_affiliation_strings":["Imagination Technologies,London,UK"],"affiliations":[{"raw_affiliation_string":"Imagination Technologies,London,UK","institution_ids":["https://openalex.org/I4210133271"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088217473"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29816914,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6872000098228455},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6395000219345093},{"id":"https://openalex.org/keywords/diagonal","display_name":"Diagonal","score":0.5569000244140625},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5175999999046326},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4449999928474426},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.42829999327659607},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.40720000863075256},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.3849000036716461},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.36800000071525574}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7597000002861023},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6872000098228455},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6395000219345093},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6133999824523926},{"id":"https://openalex.org/C130367717","wikidata":"https://www.wikidata.org/wiki/Q189791","display_name":"Diagonal","level":2,"score":0.5569000244140625},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5175999999046326},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4449999928474426},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.43130001425743103},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.42829999327659607},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.40720000863075256},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.3849000036716461},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.38269999623298645},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3797999918460846},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.36800000071525574},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.3668000102043152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3637000024318695},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.33009999990463257},{"id":"https://openalex.org/C2781221856","wikidata":"https://www.wikidata.org/wiki/Q840247","display_name":"Hypergraph","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3149999976158142},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C85817219","wikidata":"https://www.wikidata.org/wiki/Q884772","display_name":"Block matrix","level":3,"score":0.28110000491142273},{"id":"https://openalex.org/C13670688","wikidata":"https://www.wikidata.org/wiki/Q3500548","display_name":"Space partitioning","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C182873914","wikidata":"https://www.wikidata.org/wiki/Q5157329","display_name":"Computational lithography","level":5,"score":0.27129998803138733},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.26510000228881836},{"id":"https://openalex.org/C2779097318","wikidata":"https://www.wikidata.org/wiki/Q2993446","display_name":"Connectomics","level":4,"score":0.2630999982357025},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196476","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W756028849","https://openalex.org/W1935978687","https://openalex.org/W2585720638","https://openalex.org/W2897529137","https://openalex.org/W2963125977","https://openalex.org/W2963182550","https://openalex.org/W2964537638","https://openalex.org/W2972228457","https://openalex.org/W2973134322","https://openalex.org/W3046974096","https://openalex.org/W3115410382","https://openalex.org/W3159706364","https://openalex.org/W3184606595","https://openalex.org/W4200090124","https://openalex.org/W4226435553","https://openalex.org/W4233258545"],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"deep":[1],"learning":[2],"models":[3,129],"reduce":[4],"memory":[5,135],"and":[6,17,34,52,62,95,112,127,137,149,154],"computational":[7],"costs":[8],"by":[9],"eliminating":[10,120],"redundant":[11],"parameters;":[12],"however,":[13],"many":[14],"inference":[15,138,150],"engines":[16],"hardware":[18,142],"platforms":[19,84],"lack":[20],"native":[21],"support":[22],"for":[23],"sparse":[24,39,81,158],"computation,":[25],"resulting":[26,56],"in":[27,133],"underutilized":[28],"performance.":[29],"We":[30,103],"propose":[31],"a":[32,43,108],"reordering":[33],"partitioning":[35,91,110],"framework":[36],"that":[37,65],"transforms":[38],"parameter":[40],"tensors":[41,57],"into":[42,50,60],"singly-bordered":[44],"block":[45],"diagonal":[46,51],"form,":[47],"concentrating":[48],"nonzeros":[49],"border":[53],"blocks.":[54],"The":[55],"are":[58],"split":[59],"smaller":[61],"denser":[63],"sublayers":[64,75],"enable":[66],"parallel":[67],"execution":[68],"across":[69,140],"compute":[70,93],"units.":[71],"In":[72],"sparsity-aware":[73],"runtimes,":[74],"can":[76],"run":[77],"concurrently":[78],"using":[79],"specialized":[80],"kernels.":[82,102],"On":[83],"limited":[85],"to":[86,115],"dense":[87,101],"operations,":[88],"the":[89,97],"same":[90],"increases":[92],"density":[94],"improves":[96],"efficiency":[98],"of":[99,157],"standard":[100],"formulate":[104],"this":[105],"transformation":[106],"as":[107],"hypergraph":[109],"problem":[111],"extend":[113],"it":[114],"reorder":[116],"multiple":[117],"layers":[118],"jointly,":[119],"costly":[121],"activation":[122],"reshuffling.":[123],"Experiments":[124],"on":[125],"synthetic":[126],"real-world":[128],"demonstrate":[130],"significant":[131],"reductions":[132],"compute,":[134],"usage,":[136],"latency":[139],"diverse":[141],"targets.":[143],"Our":[144],"method":[145],"complements":[146],"existing":[147],"compilers":[148],"pipelines,":[151],"enabling":[152],"scalable":[153],"efficient":[155],"deployment":[156],"models.":[159]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-16T00:00:00"}
