{"id":"https://openalex.org/W4415250827","doi":"https://doi.org/10.1109/hpec67600.2025.11196684","title":"Towards Efficient Sparse Deep Neural Network Inference via Multi-level Concurrency Orchestration","display_name":"Towards Efficient Sparse Deep Neural Network Inference via Multi-level Concurrency Orchestration","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250827","doi":"https://doi.org/10.1109/hpec67600.2025.11196684"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196684","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196684","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058494044","display_name":"Ming Dun","orcid":"https://orcid.org/0000-0002-0664-9543"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ming Dun","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003189850","display_name":"Jie Zhou","orcid":"https://orcid.org/0000-0003-2406-3981"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Zhou","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012125082","display_name":"Huawei Cao","orcid":"https://orcid.org/0000-0003-1176-2521"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huawei Cao","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084238352","display_name":"Shuhan Song","orcid":"https://orcid.org/0009-0007-2997-3294"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuhan Song","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100540523","display_name":"Yiming Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Sun","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004624509","display_name":"Mingyu Yan","orcid":"https://orcid.org/0000-0002-6915-955X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyu Yan","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023098180","display_name":"Xiaochun Ye","orcid":"https://orcid.org/0000-0003-4598-1685"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaochun Ye","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5058494044"],"corresponding_institution_ids":["https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15085092,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8698999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8698999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.8680999875068665,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.8565000295639038,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6974999904632568},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.5834000110626221},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5795000195503235},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.49399998784065247},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.4659999907016754},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.447299987077713},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4275999963283539},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.39070001244544983},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.37549999356269836}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8690999746322632},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6974999904632568},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.5834000110626221},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5795000195503235},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5253000259399414},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5246000289916992},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.49399998784065247},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47839999198913574},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.447299987077713},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.39070001244544983},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.37439998984336853},{"id":"https://openalex.org/C17458331","wikidata":"https://www.wikidata.org/wiki/Q935672","display_name":"Spawn (biology)","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.32919999957084656},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3260999917984009},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3158999979496002},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.272599995136261},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C150495011","wikidata":"https://www.wikidata.org/wiki/Q128392","display_name":"Concurrent computing","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C2778047078","wikidata":"https://www.wikidata.org/wiki/Q82299449","display_name":"Tardiness","level":4,"score":0.25519999861717224},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196684","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196684","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2007339694","https://openalex.org/W2021908319","https://openalex.org/W2075244932","https://openalex.org/W2914020633","https://openalex.org/W2964537638","https://openalex.org/W2973134322","https://openalex.org/W2982157693","https://openalex.org/W3035371198","https://openalex.org/W3115410382","https://openalex.org/W3214762859","https://openalex.org/W4200090124","https://openalex.org/W4220818654","https://openalex.org/W4304192541","https://openalex.org/W4308083513","https://openalex.org/W4308090748","https://openalex.org/W4321636494","https://openalex.org/W4360831990","https://openalex.org/W4388858680","https://openalex.org/W4389742650","https://openalex.org/W4390188278","https://openalex.org/W4391986945","https://openalex.org/W4401090539","https://openalex.org/W4401408710","https://openalex.org/W4402369978","https://openalex.org/W4408891420","https://openalex.org/W4408903522"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"years":[1],"have":[2],"witnessed":[3],"the":[4,64,71,109,136],"prosperity":[5],"of":[6,66,123],"Sparse":[7],"Deep":[8],"Neural":[9],"Networks":[10],"(SpDNNs),":[11],"as":[12],"they":[13],"are":[14],"more":[15],"parameter-efficient":[16],"and":[17,33,57,83,102,135,153],"cost-effective":[18],"than":[19],"their":[20],"dense":[21],"counterparts.":[22],"To":[23,69],"further":[24],"reduce":[25],"costs,":[26],"it":[27],"is":[28],"prevalent":[29],"for":[30],"AI":[31],"companies":[32],"research":[34],"groups":[35],"to":[36,99,118],"manage":[37],"GPU":[38,59,106],"servers":[39],"in":[40,96,145],"a":[41,93,150],"multi-tenant":[42],"manner":[43],"that":[44,139],"handles":[45],"multiple":[46,126],"inference":[47,97,127],"requests":[48],"simultaneously.":[49],"However,":[50],"SpDNNs":[51,79],"suffer":[52],"from":[53],"intricate":[54],"data":[55],"dependency":[56],"low":[58],"resource":[60],"utilization,":[61],"which":[62,77],"impedes":[63],"efficiency":[65],"batched":[67],"inference.":[68],"tackle":[70],"above":[72],"problems,":[73],"we":[74,91,112],"propose":[75],"CoSpDNN,":[76],"optimizes":[78],"through":[80,132],"exploiting":[81],"intra-kernel":[82],"inter-kernel":[84],"concurrency":[85,104],"on":[86,149],"GPUs.":[87],"On":[88,108],"one":[89],"hand,":[90,111],"design":[92],"fine-grained":[94],"pipeline":[95],"kernels":[98],"minimize":[100],"bubbles":[101],"improve":[103],"among":[105,125],"resources.":[107],"other":[110],"develop":[113],"an":[114],"adaptive":[115],"scheduling":[116],"strategy":[117],"enable":[119],"efficient":[120],"spatial":[121],"sharing":[122],"GPUs":[124],"tasks.":[128],"We":[129],"evaluate":[130],"CoSpDNN":[131],"extensive":[133],"experiments,":[134],"results":[137],"demonstrate":[138],"our":[140],"work":[141],"outperforms":[142],"previous":[143],"champions":[144],"task":[146],"completion":[147],"time":[148],"single":[151],"SpDNN":[152],"diverse":[154],"SpDNNs.":[155]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-16T00:00:00"}
