{"id":"https://openalex.org/W4409133356","doi":"https://doi.org/10.1109/hpec62836.2024.10938434","title":"Distributed-Memory Sparse Deep Neural Network Inference Using Global Arrays","display_name":"Distributed-Memory Sparse Deep Neural Network Inference Using Global Arrays","publication_year":2024,"publication_date":"2024-09-23","ids":{"openalex":"https://openalex.org/W4409133356","doi":"https://doi.org/10.1109/hpec62836.2024.10938434"},"language":"en","primary_location":{"id":"doi:10.1109/hpec62836.2024.10938434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060084266","display_name":"Bruce Palmer","orcid":"https://orcid.org/0000-0002-1933-2350"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bruce Palmer","raw_affiliation_strings":["Pacific Northwest National Laboratory,Richland,WA,USA"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory,Richland,WA,USA","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100670911","display_name":"Sayan Ghosh","orcid":"https://orcid.org/0000-0001-8758-7657"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sayan Ghosh","raw_affiliation_strings":["Pacific Northwest National Laboratory,Richland,WA,USA"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory,Richland,WA,USA","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102811479","display_name":"Andr\u00e9s M\u00e1rquez","orcid":"https://orcid.org/0000-0002-4313-1882"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andr\u00e9s M\u00e1rquez","raw_affiliation_strings":["Pacific Northwest National Laboratory,Richland,WA,USA"],"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory,Richland,WA,USA","institution_ids":["https://openalex.org/I142606810"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5060084266"],"corresponding_institution_ids":["https://openalex.org/I142606810"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26985602,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.921999990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.921999990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7662360668182373},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7007083892822266},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5054371356964111},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47151175141334534},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4135444760322571}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7662360668182373},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7007083892822266},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5054371356964111},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47151175141334534},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4135444760322571}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec62836.2024.10938434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4499092682","display_name":null,"funder_award_id":"DE-AC06-76RL01830","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"}],"funders":[{"id":"https://openalex.org/F4320306250","display_name":"Battelle","ror":"https://ror.org/01h5tnr73"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1495140291","https://openalex.org/W1521427322","https://openalex.org/W1527193372","https://openalex.org/W1965092590","https://openalex.org/W2007339694","https://openalex.org/W2012313745","https://openalex.org/W2014807599","https://openalex.org/W2031571229","https://openalex.org/W2050930161","https://openalex.org/W2069079195","https://openalex.org/W2108369438","https://openalex.org/W2109982223","https://openalex.org/W2140300123","https://openalex.org/W2461193710","https://openalex.org/W2898123186","https://openalex.org/W2973134322","https://openalex.org/W2999364476","https://openalex.org/W3115410382","https://openalex.org/W3116086216","https://openalex.org/W4200090124","https://openalex.org/W4230289604","https://openalex.org/W4236558082","https://openalex.org/W4242946001","https://openalex.org/W4252724804","https://openalex.org/W4290648346","https://openalex.org/W4308090748","https://openalex.org/W6738642365","https://openalex.org/W6765532259","https://openalex.org/W6767997687","https://openalex.org/W6790503700"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W3042419602","https://openalex.org/W2966649771"],"abstract_inverted_index":{"Partitioned":[0],"Global":[1,84,125,225],"Address":[2],"Space":[3],"(PGAS)":[4],"models":[5,43,62,99],"exhibit":[6],"tremendous":[7],"promise":[8],"in":[9,22,78,113,168,180,201,220],"developing":[10],"efficient":[11],"and":[12,33,105,109,139,194,217,236,244,277],"productive":[13,237],"distributed-memory":[14],"parallel":[15],"applications.":[16],"They":[17],"have":[18,44,122,163],"been":[19,45],"used":[20,112],"extensively":[21],"scientific":[23,178],"computations":[24,73],"due":[25],"to":[26,47,66,80,100,128,209,233,280],"conveniently":[27],"offering":[28],"a":[29,211,264],"\u201cshared-memory\u201d-like":[30],"programming":[31],"model":[32],"interfaces":[34],"that":[35,229],"separate":[36],"communication":[37,42,104],"with":[38,173],"synchronization.":[39],"Traditionally,":[40],"PGAS":[41,61,98],"applied":[46],"dense/contiguously":[48],"distributed":[49,68,106],"data,":[50],"but":[51],"most":[52],"modern":[53,185],"applications":[54,179],"contain":[55],"varied":[56],"levels":[57],"of":[58,95,154,213,252,275],"sparsity.":[59],"Existing":[60],"require":[63,75],"certain":[64],"adaptations":[65],"support":[67,129,210],"sparse":[69,131,134,140,155,159,214],"computations,":[70],"since":[71],"associated":[72],"often":[74,190],"matrix":[76,107,136,142,215],"arithmetic,":[77],"addition":[79],"data":[81,103],"movement.":[82],"The":[83,267],"Arrays":[85,126,226],"toolkit":[86,127],"from":[87],"Pacific":[88],"Northwest":[89],"National":[90],"Laboratory":[91],"(PNNL)":[92],"is":[93,110,208,231,271],"one":[94],"the":[96,114,124,152,221,250,282],"earliest":[97],"combine":[101],"one-sided":[102],"operations,":[108,132],"still":[111],"popular":[115],"NWChem":[116],"quantum":[117],"chemistry":[118],"suite.":[119],"Recently,":[120],"we":[121],"expanded":[123],"common":[130],"like":[133],"matrix-dense":[135],"multiplies":[137],"(SpMM)":[138],"matrix-sparse":[141],"multiplication":[143],"(SpGEMM).":[144],"As":[145],"it":[146,230],"turns":[147],"out,":[148],"these":[149],"operations":[150,216],"are":[151],"backbone":[153],"Deep":[156,257],"Learning":[157,239],"(DL);":[158],"deep":[160],"neural":[161],"networks":[162],"gained":[164],"increasing":[165],"attention":[166],"recently":[167],"achieving":[169],"speedups":[170],"on":[171,192,273],"inference":[172],"reduced":[174],"memory":[175],"footprints.":[176],"Unlike":[177],"High":[181],"Performance":[182],"Computing":[183],"(HPC),":[184],"(distributed-memory":[186],"capable)":[187],"DL":[188],"toolkits":[189],"rely":[191],"non-standardized":[193],"closed-source":[195],"vendor":[196],"software":[197,243],"optimizations,":[198],"creating":[199],"challenges":[200],"software-hardware":[202],"co-design":[203],"at":[204],"scale.":[205],"Our":[206],"goal":[207],"variety":[212],"helper":[218],"functions":[219],"newly":[222],"created":[223],"Sparse":[224,256],"(SGA),":[227],"such":[228],"possible":[232],"build":[234],"portable":[235],"Machine":[238],"scenarios":[240,262],"for":[241],"algorithm/":[242],"hardware":[245],"codesign":[246],"purposes.":[247],"We":[248],"demonstrate":[249],"usefulness":[251],"SGA":[253,269],"by":[254],"building":[255],"Neural":[258],"Network":[259],"(SpDNN)":[260],"challenge":[261],"as":[263],"case":[265],"study.":[266],"current":[268],"implementation":[270],"built":[272],"top":[274],"MPI":[276],"uses":[278],"CPUs":[279],"maximize":[281],"portability":[283],"across":[284],"platforms.":[285]},"counts_by_year":[],"updated_date":"2026-03-04T07:04:00.330322","created_date":"2025-10-10T00:00:00"}
