{"id":"https://openalex.org/W4200504397","doi":"https://doi.org/10.1109/hpec49654.2021.9622801","title":"GCN Inference Acceleration using High-Level Synthesis","display_name":"GCN Inference Acceleration using High-Level Synthesis","publication_year":2021,"publication_date":"2021-09-20","ids":{"openalex":"https://openalex.org/W4200504397","doi":"https://doi.org/10.1109/hpec49654.2021.9622801"},"language":"en","primary_location":{"id":"doi:10.1109/hpec49654.2021.9622801","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec49654.2021.9622801","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080825397","display_name":"Yi Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yi Chien Lin","raw_affiliation_strings":["University of Southern California, Los Angeles, California"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, California","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101916609","display_name":"Bingyi Zhang","orcid":"https://orcid.org/0000-0002-8115-0814"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bingyi Zhang","raw_affiliation_strings":["University of Southern California, Los Angeles, California"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, California","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033166029","display_name":"Viktor K. Prasanna","orcid":"https://orcid.org/0000-0002-1609-8589"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viktor Prasanna","raw_affiliation_strings":["University of Southern California, Los Angeles, California"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, California","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080825397"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":1.9036,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88567415,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8301734328269958},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6846040487289429},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6312234401702881},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.5879493355751038},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.5733294486999512},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.572058916091919},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5648888349533081},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5462051630020142},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5290666222572327},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5227146744728088},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4632358253002167},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.45076286792755127},{"id":"https://openalex.org/keywords/high-level-synthesis","display_name":"High-level synthesis","score":0.42506933212280273},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4221556484699249},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3616800010204315},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.34560829401016235},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2195415198802948}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8301734328269958},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6846040487289429},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6312234401702881},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.5879493355751038},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.5733294486999512},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.572058916091919},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5648888349533081},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5462051630020142},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5290666222572327},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5227146744728088},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4632358253002167},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.45076286792755127},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.42506933212280273},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4221556484699249},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3616800010204315},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.34560829401016235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2195415198802948},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec49654.2021.9622801","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec49654.2021.9622801","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8999999761581421,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2045271686","https://openalex.org/W2604314403","https://openalex.org/W2807021761","https://openalex.org/W2809418595","https://openalex.org/W2916106175","https://openalex.org/W2945827377","https://openalex.org/W2951136539","https://openalex.org/W2961295589","https://openalex.org/W2964015378","https://openalex.org/W2964571482","https://openalex.org/W3017228913","https://openalex.org/W3046757167","https://openalex.org/W3080555959","https://openalex.org/W3093177412","https://openalex.org/W3100848837","https://openalex.org/W3101553402","https://openalex.org/W4288419263","https://openalex.org/W6726873649","https://openalex.org/W6735632219","https://openalex.org/W6760001035","https://openalex.org/W6760045743","https://openalex.org/W6764171799","https://openalex.org/W6765543928","https://openalex.org/W6781932242","https://openalex.org/W6784286654"],"related_works":["https://openalex.org/W1741246166","https://openalex.org/W1508949720","https://openalex.org/W1855080238","https://openalex.org/W4281926497","https://openalex.org/W2890557332","https://openalex.org/W3048824278","https://openalex.org/W3022644913","https://openalex.org/W2077629192","https://openalex.org/W2269990635","https://openalex.org/W3206653210"],"abstract_inverted_index":{"GCN":[0,27,38,66,159],"(Graph":[1],"Convolutional":[2],"Network)":[3],"has":[4],"become":[5],"a":[6,33,37,56,115,162],"promising":[7],"solution":[8],"for":[9],"many":[10],"applications,":[11],"such":[12],"as":[13],"recommendation":[14],"systems,":[15],"social":[16],"data":[17,71,92],"mining,":[18],"etc.":[19],"Many":[20],"of":[21,36,65,217,225],"these":[22],"applications":[23],"requires":[24],"low":[25],"latency":[26,152,175,216],"inference.In":[28],"this":[29],"paper,":[30],"we":[31,54,80,95],"provide":[32,81],"case":[34],"study":[35],"inference":[39,160],"acceleration":[40],"on":[41,114,144,161,184],"FPGA.":[42],"We":[43,110,128],"explore":[44],"high-level":[45],"synthesis":[46],"programming":[47],"model":[48],"to":[49,60,69,100,156],"achieve":[50,90],"low-latency":[51],"inference.":[52],"First,":[53],"propose":[55],"partition-centric":[57],"mapping":[58],"strategy":[59],"map":[61],"the":[62,174,185,205,210,215],"execution":[63],"tasks":[64],"onto":[67],"FPGA":[68,117],"exploit":[70],"reuse,":[72],"which":[73,104],"reduces":[74,173],"external":[75],"memory":[76,87],"access":[77],"overhead.":[78],"Second,":[79],"HLS-based":[82],"kernel":[83],"design":[84,97,113,131,172,191,219,229],"with":[85,132,167,188,204,209],"improved":[86],"performance":[88],"and":[89,126,138,147,153,178,197],"massive":[91],"parallelism.":[93],"Third,":[94],"perform":[96,157],"space":[98],"exploration":[99],"facilitate":[101],"feasible":[102],"pre-placement":[103],"avoids":[105],"potential":[106],"Place-and-Route":[107],"(PnR)":[108],"failures.":[109],"evaluate":[111],"our":[112,130,171,190,218,228],"state-of-the-art":[116,134],"platform":[118],"using":[119],"three":[120],"commonly":[121],"used":[122],"datasets:":[123],"Reddit,":[124],"Yelp":[125],"Amazon-2M.":[127],"compare":[129],"two":[133],"libraries":[135],"PyTorch-Geometric":[136],"(PyG)":[137],"Deep":[139],"Graph":[140],"Library":[141],"(DGL)":[142],"running":[143],"high-end":[145],"CPU":[146,169,206],"GPU":[148,212],"by":[149,176],"evaluating":[150],"their":[151],"energy":[154,182,201,233],"efficiency":[155],"full-batch":[158],"two-layer":[163],"Vanilla-GCN":[164],"model.":[165],"Compared":[166,187,208],"PyG":[168],"version,":[170,213],"59.95\u00d7":[177],"is":[179,198,220,230],"96.22\u00d7":[180],"more":[181,200,232],"efficient":[183,202],"average.":[186],"DGL,":[189],"achieves":[192],"2.9":[193],"\u00d7":[194,222],"\u20136.4\u00d7":[195],"speedup":[196],"5.87\u00d7":[199],"compared":[203],"version.":[207],"DGL":[211,226],"although":[214],"1.67":[221],"\u20132.5\u00d7":[223],"that":[224],"GPU,":[227],"1.8\u00d7":[231],"efficient.":[234]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
