{"id":"https://openalex.org/W4308090748","doi":"https://doi.org/10.1109/hpec55821.2022.9926300","title":"Accelerating Sparse Deep Neural Network Inference Using GPU Tensor Cores","display_name":"Accelerating Sparse Deep Neural Network Inference Using GPU Tensor Cores","publication_year":2022,"publication_date":"2022-09-19","ids":{"openalex":"https://openalex.org/W4308090748","doi":"https://doi.org/10.1109/hpec55821.2022.9926300"},"language":"en","primary_location":{"id":"doi:10.1109/hpec55821.2022.9926300","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101905150","display_name":"Yufei Sun","orcid":"https://orcid.org/0000-0002-6520-6697"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yufei Sun","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103241222","display_name":"Long Zheng","orcid":"https://orcid.org/0000-0002-5812-7317"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Zheng","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115599465","display_name":"Qinggang Wang","orcid":"https://orcid.org/0000-0002-9951-3345"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinggang Wang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114546959","display_name":"Xiangyu Ye","orcid":"https://orcid.org/0009-0008-1285-5176"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Ye","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077925334","display_name":"Yu Huang","orcid":"https://orcid.org/0000-0003-3699-4708"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Huang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010485648","display_name":"Pengcheng Yao","orcid":"https://orcid.org/0000-0003-4701-2239"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengcheng Yao","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022398389","display_name":"Xiaofei Liao","orcid":"https://orcid.org/0000-0001-6302-813X"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei Liao","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022262922","display_name":"Hai Jin","orcid":"https://orcid.org/0000-0002-3934-7605"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Laboratory,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.9291,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.87507974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7178943753242493},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.7003064751625061},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5666136741638184},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5565497279167175},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5205501317977905},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5031432509422302},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4882247745990753},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4355863928794861},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4321146607398987},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4247645139694214},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4136638045310974},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4035723805427551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36314430832862854},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1388421654701233},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.08777251839637756},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.0788850486278534}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7178943753242493},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.7003064751625061},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5666136741638184},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5565497279167175},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5205501317977905},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5031432509422302},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4882247745990753},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4355863928794861},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4321146607398987},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4247645139694214},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4136638045310974},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4035723805427551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36314430832862854},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1388421654701233},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.08777251839637756},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0788850486278534},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec55821.2022.9926300","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1986844835","https://openalex.org/W2012407419","https://openalex.org/W2040404421","https://openalex.org/W2194775991","https://openalex.org/W2276892413","https://openalex.org/W2551895583","https://openalex.org/W2626696598","https://openalex.org/W2768489488","https://openalex.org/W2791673912","https://openalex.org/W2799176105","https://openalex.org/W2901549770","https://openalex.org/W2953881420","https://openalex.org/W2962766617","https://openalex.org/W2964537638","https://openalex.org/W2969388332","https://openalex.org/W2973134322","https://openalex.org/W2990514424","https://openalex.org/W2990689488","https://openalex.org/W3014399784","https://openalex.org/W3043303806","https://openalex.org/W3100809954","https://openalex.org/W3100839241","https://openalex.org/W3101543398","https://openalex.org/W3104528661","https://openalex.org/W3113443077","https://openalex.org/W3115410382","https://openalex.org/W3117140649","https://openalex.org/W3130660608","https://openalex.org/W3132532188","https://openalex.org/W4200090124","https://openalex.org/W6632455782"],"related_works":["https://openalex.org/W2052993554","https://openalex.org/W2046125858","https://openalex.org/W2293771254","https://openalex.org/W2119413962","https://openalex.org/W2030992542","https://openalex.org/W3121828480","https://openalex.org/W2039875226","https://openalex.org/W4221142455","https://openalex.org/W2032786851","https://openalex.org/W2914631005"],"abstract_inverted_index":{"Sparse":[0,239],"deep":[1],"neural":[2],"networks":[3],"(SpDNN)":[4],"attract":[5],"a":[6,113,162,220],"lot":[7],"of":[8,14,33,156,178,184,234],"research":[9],"and":[10,135,144,228,237,265],"industry":[11],"attention":[12],"because":[13],"their":[15],"powerful":[16,101],"learning":[17],"capability,":[18],"whose":[19],"execution":[20],"time":[21],"is":[22,189],"dominated":[23],"by":[24],"the":[25,89,93,120,133,148,153,168,176,182,190,201,232,235,258,269],"sparse":[26,79],"matrix-dense":[27],"matrix":[28,37,80,115,122],"multiplication":[29,47],"(SpMM).":[30],"As":[31],"one":[32],"specialized":[34],"processors":[35],"for":[36,58],"multiplication,":[38],"NVIDIA":[39],"GPU":[40],"Tensor":[41,66,73,102,142,157,172,197],"Cores":[42,67,74,103,143,173,198],"can":[43],"perform":[44],"half-precision":[45,154],"matrix-matrix":[46],"with":[48],"higher":[49],"performance":[50,86],"than":[51],"CUDA":[52,145],"Cores,":[53,146,158],"which":[54,118],"provides":[55],"great":[56],"op-portunities":[57],"SpMM":[59,63],"acceleration.":[60],"However,":[61],"performing":[62],"efficiently":[64],"on":[65,141,171,196,219,262,273],"remains":[68],"tremendously":[69],"challenging.":[70],"First,":[71],"typical":[72],"do":[75],"not":[76],"handle":[77],"extremely":[78],"computations":[81],"well,":[82],"delivering":[83],"much":[84],"lower":[85],"compared":[87],"to":[88,104,123,166,192,212,226,253],"dense":[90],"counterparts.":[91],"Second,":[92,151],"single-precision":[94,169],"Challenge":[95],"dataset":[96],"prevents":[97],"them":[98],"from":[99],"leveraging":[100],"improve":[105],"performance.":[106],"To":[107,181],"this":[108,187],"end,":[109],"we":[110,159],"first":[111,191],"propose":[112,161],"similarity-based":[114],"transformation":[116],"scheme,":[117],"polarizes":[119],"weight":[121],"be":[124],"either":[125],"denser":[126,134],"or":[127],"sparser":[128,136],"in":[129],"local":[130],"regions.":[131],"Then":[132],"workloads":[137],"are":[138,250],"respectively":[139],"processed":[140],"boosting":[147],"overall":[149],"efficiency.":[150],"considering":[152],"limitation":[155],"further":[160],"lightweight":[163],"emulation":[164],"algorithm":[165],"achieve":[167],"computation":[170],"without":[174,199],"affecting":[175],"correctness":[177],"final":[179],"results.":[180],"best":[183],"our":[185,208,247],"knowl-edge,":[186],"paper":[188],"accelerate":[193],"SpDNN":[194],"inference":[195,217],"compromising":[200],"precision":[202],"requirement.":[203],"Extensive":[204],"experiments":[205],"validate":[206],"that":[207],"work":[209],"reaches":[210],"up":[211,225,252],"300":[213],"TeraEdges":[214],"per":[215],"second":[216],"throughput":[218],"single":[221],"A100":[222],"GPU,":[223],"yielding":[224],"89.41x":[227],"8.12x":[229],"speedups":[230],"against":[231],"champions":[233],"2020":[236,270],"2021":[238,259],"Deep":[240],"Neural":[241],"Network":[242],"Graph":[243],"Challenge,":[244],"respectively.":[245],"Moreover,":[246],"4-GPU":[248],"version":[249],"also":[251],"6.56":[254],"x":[255],"faster":[256,267],"over":[257,268],"champion":[260,271],"running":[261,272],"4":[263],"GPUs":[264],"7.55x":[266],"768":[274],"GPUs.":[275]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
