{"id":"https://openalex.org/W4378804724","doi":"https://doi.org/10.1109/tcad.2023.3281714","title":"Algorithm/Hardware Co-Optimization for Sparsity-Aware SpMM Acceleration of GNNs","display_name":"Algorithm/Hardware Co-Optimization for Sparsity-Aware SpMM Acceleration of GNNs","publication_year":2023,"publication_date":"2023-05-31","ids":{"openalex":"https://openalex.org/W4378804724","doi":"https://doi.org/10.1109/tcad.2023.3281714"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2023.3281714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2023.3281714","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061329069","display_name":"Yingxue Gao","orcid":"https://orcid.org/0000-0002-0833-1379"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yingxue Gao","raw_affiliation_strings":["Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072915437","display_name":"Lei Gong","orcid":"https://orcid.org/0000-0002-8391-5526"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Gong","raw_affiliation_strings":["Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011884768","display_name":"Chao Wang","orcid":"https://orcid.org/0000-0002-9403-5575"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Wang","raw_affiliation_strings":["Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089833377","display_name":"Teng Wang","orcid":"https://orcid.org/0000-0002-7281-1203"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Teng Wang","raw_affiliation_strings":["Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100407749","display_name":"Xi Li","orcid":"https://orcid.org/0009-0003-7871-5401"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Li","raw_affiliation_strings":["Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077322091","display_name":"Xuehai Zhou","orcid":"https://orcid.org/0000-0002-8360-3143"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuehai Zhou","raw_affiliation_strings":["Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5061329069"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":4.9672,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.96147352,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"42","issue":"12","first_page":"4763","last_page":"4776"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9657999873161316,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7033767700195312},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6833062171936035},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.49436044692993164},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40458235144615173},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3544310927391052},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.33443766832351685},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.29931557178497314},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1577049195766449},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.137327641248703},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08716797828674316}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7033767700195312},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6833062171936035},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.49436044692993164},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40458235144615173},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3544310927391052},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.33443766832351685},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.29931557178497314},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1577049195766449},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.137327641248703},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08716797828674316},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2023.3281714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2023.3281714","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1443428683","display_name":null,"funder_award_id":"62102383","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2701524985","display_name":null,"funder_award_id":"BK20210123","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G6472083938","display_name":null,"funder_award_id":"62172380","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6489310523","display_name":null,"funder_award_id":"Y2021121","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"},{"id":"https://openalex.org/G8749416846","display_name":null,"funder_award_id":"2022YFB4501603","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8758304440","display_name":null,"funder_award_id":"61976200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1565832774","https://openalex.org/W2048266589","https://openalex.org/W2067247412","https://openalex.org/W2094756095","https://openalex.org/W2152839228","https://openalex.org/W2520083297","https://openalex.org/W2584785188","https://openalex.org/W2605347906","https://openalex.org/W2612076670","https://openalex.org/W2794952988","https://openalex.org/W2883927138","https://openalex.org/W2906043559","https://openalex.org/W2933438941","https://openalex.org/W2935331687","https://openalex.org/W2979310060","https://openalex.org/W2979455536","https://openalex.org/W2996351509","https://openalex.org/W3008730548","https://openalex.org/W3016832937","https://openalex.org/W3017228913","https://openalex.org/W3018935228","https://openalex.org/W3047846843","https://openalex.org/W3098863686","https://openalex.org/W3103168911","https://openalex.org/W3121828480","https://openalex.org/W3132695675","https://openalex.org/W3134265729","https://openalex.org/W3144812654","https://openalex.org/W3154688839","https://openalex.org/W3155922894","https://openalex.org/W3155936517","https://openalex.org/W3158371160","https://openalex.org/W3158831985","https://openalex.org/W3198975860","https://openalex.org/W3214431650","https://openalex.org/W4200042293","https://openalex.org/W4225751948","https://openalex.org/W4231846589","https://openalex.org/W4283793952","https://openalex.org/W4284704474","https://openalex.org/W4312678586","https://openalex.org/W6603201521","https://openalex.org/W6681885597","https://openalex.org/W6793793997"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2565094479","https://openalex.org/W2390829436","https://openalex.org/W1989791859","https://openalex.org/W602859758","https://openalex.org/W1971289376","https://openalex.org/W2379101322","https://openalex.org/W2146872326","https://openalex.org/W3158825072"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"graph":[3],"neural":[4],"networks":[5],"(GNNs)":[6],"have":[7,47],"achieved":[8],"impressive":[9],"performance":[10,31,75],"in":[11,80],"various":[12],"application":[13],"fields":[14],"by":[15],"extracting":[16],"information":[17],"from":[18],"graph-structured":[19],"data.":[20],"It":[21],"contains":[22],"extensive":[23,70],"feature":[24,58],"aggregation":[25,59,139],"operations":[26],"and":[27,69,112,127,152,172,186,191,206,258],"has":[28],"become":[29],"a":[30,38,100,141,189],"bottleneck,":[32],"which":[33,198],"can":[34],"be":[35],"abstracted":[36],"as":[37],"specialized":[39],"sparse-dense":[40],"matrix":[41],"multiplication":[42],"(SpMM)":[43],"operation.":[44],"Previous":[45],"works":[46],"leveraged":[48],"the":[49,57,74,96,118,137,154,166,178,182,200,208,217,248,254,269],"inner":[50],"product":[51,54],"or":[52],"outer":[53],"to":[55,65,89,109,147,164,194,203],"accelerate":[56],"process.":[60],"However,":[61],"inefficient":[62],"execution":[63],"leads":[64],"extremely":[66],"unbalanced":[67],"workloads":[68],"intermediate":[71],"data,":[72],"hampering":[73],"of":[76,169],"previous":[77,249],"processors.":[78],"So":[79],"this":[81],"article,":[82],"we":[83,213],"demonstrate":[84,226],"an":[85],"algorithm/hardware":[86],"co-optimization":[87],"chance":[88],"enhance":[90],"SpMM":[91,102,184,196],"acceleration":[92],"for":[93],"GNNs.":[94],"First,":[95],"algorithm":[97,103,160,185],"part":[98,180],"develops":[99],"dataflow-efficient":[101],"that":[104,227],"integrates":[105],"three":[106],"optimization":[107,143],"methods":[108],"mitigate":[110],"computation":[111],"memory":[113],"access":[114],"inefficiencies.":[115],"Specifically,":[116],"1)":[117],"proposed":[119,183],"equal-value":[120],"partition":[121,126],"method":[122,144],"achieves":[123,229],"fine-grained":[124],"data":[125,132,150],"enables":[128],"load":[129],"balancing":[130],"during":[131],"movement;":[133],"2)":[134],"after":[135],"observing":[136],"vertex":[138],"phenomenon,":[140],"vertex-clustering":[142],"is":[145,161],"presented":[146],"enable":[148,165],"significant":[149],"locality;":[151],"3)":[153],"adaptive":[155,201],"dataflow":[156,202],"based":[157],"on":[158,216,253],"Gustavson\u2019s":[159],"further":[162],"implemented":[163],"efficient":[167,192],"distribution":[168],"sparse":[170],"elements":[171],"improves":[173],"computing":[174],"resource":[175],"utilization.":[176],"Then,":[177],"hardware":[179],"features":[181],"customizes":[187],"SDMA,":[188],"flexible":[190],"accelerator":[193,222],"boost":[195],"acceleration,":[197],"follows":[199],"eliminate":[204],"sparsity":[205],"explore":[207],"regular":[209],"parallelism":[210],"dimension.":[211],"Finally,":[212],"prototype":[214],"SDMA":[215,228],"Xilinx":[218],"Alveo":[219],"U280":[220],"FPGA":[221,271],"card.":[223],"The":[224],"results":[225],"<inline-formula":[230,238,259],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[231,239,260],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[232,240,261],"<tex-math":[233,241,262],"notation=\"LaTeX\">$5.68\\times":[234],"$":[235,243,264],"</tex-math></inline-formula>":[236,244,265],"\u2013":[237],"notation=\"LaTeX\">$14.68\\times":[242],"energy":[245],"efficiency":[246],"over":[247,268],"GPU":[250],"implementations":[251],"deployed":[252],"Nvidia":[255],"GTX":[256],"1080Ti":[257],"notation=\"LaTeX\">$1.32\\times":[263],"higher":[266],"throughput":[267],"state-of-the-art":[270],"prototype.":[272]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
