{"id":"https://openalex.org/W4402129561","doi":"https://doi.org/10.1145/3691636","title":"Graph-OPU: A Highly Flexible FPGA-Based Overlay Processor for Graph Neural Networks","display_name":"Graph-OPU: A Highly Flexible FPGA-Based Overlay Processor for Graph Neural Networks","publication_year":2024,"publication_date":"2024-09-02","ids":{"openalex":"https://openalex.org/W4402129561","doi":"https://doi.org/10.1145/3691636"},"language":"en","primary_location":{"id":"doi:10.1145/3691636","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3691636","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3691636","source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3691636","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056576567","display_name":"Enhao Tang","orcid":"https://orcid.org/0000-0002-0539-8885"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Enhao Tang","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100442626","display_name":"Shun Li","orcid":"https://orcid.org/0000-0001-9003-8966"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shun Li","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100761386","display_name":"Ruiqi Chen","orcid":"https://orcid.org/0000-0001-6837-5675"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruiqi Chen","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102185842","display_name":"Hao Zhou","orcid":"https://orcid.org/0009-0003-8421-1242"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhou","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053646739","display_name":"Yuhanxiao Ma","orcid":"https://orcid.org/0000-0003-0958-6397"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuhanxiao Ma","raw_affiliation_strings":["New York University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056063611","display_name":"Haoyang Zhang","orcid":"https://orcid.org/0000-0003-4496-4752"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyang Zhang","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103098516","display_name":"Jun Yu","orcid":"https://orcid.org/0000-0003-4286-9292"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015932760","display_name":"Kun Wang","orcid":"https://orcid.org/0000-0002-7288-1789"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]},{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Wang","raw_affiliation_strings":["School of Microelectronics, Fudan University, Shanghai Shi, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Fudan University, Shanghai Shi, China","institution_ids":["https://openalex.org/I4210132426","https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5056576567"],"corresponding_institution_ids":["https://openalex.org/I24943067","https://openalex.org/I4210132426"],"apc_list":null,"apc_paid":null,"fwci":2.5459,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.90805895,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"17","issue":"4","first_page":"1","last_page":"33"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8664565086364746},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5836657285690308},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.568882942199707},{"id":"https://openalex.org/keywords/overlay","display_name":"Overlay","score":0.533292293548584},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.38299813866615295},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.2743508219718933},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2425622045993805},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.22138768434524536}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8664565086364746},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5836657285690308},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.568882942199707},{"id":"https://openalex.org/C136085584","wikidata":"https://www.wikidata.org/wiki/Q910289","display_name":"Overlay","level":2,"score":0.533292293548584},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.38299813866615295},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2743508219718933},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2425622045993805},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.22138768434524536}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3691636","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3691636","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3691636","source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3691636","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3691636","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3691636","source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.9100000262260437,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402129561.pdf"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1990832096","https://openalex.org/W2023930909","https://openalex.org/W2101028855","https://openalex.org/W2116341502","https://openalex.org/W2907492528","https://openalex.org/W2915621743","https://openalex.org/W2979455536","https://openalex.org/W2997106510","https://openalex.org/W3004208721","https://openalex.org/W3017228913","https://openalex.org/W3035708866","https://openalex.org/W3045200674","https://openalex.org/W3046757167","https://openalex.org/W3090369187","https://openalex.org/W3094497296","https://openalex.org/W3105753905","https://openalex.org/W3156650687","https://openalex.org/W3172512547","https://openalex.org/W3173856255","https://openalex.org/W3206743063","https://openalex.org/W4200504397","https://openalex.org/W4206367183","https://openalex.org/W4226439885","https://openalex.org/W4229487452","https://openalex.org/W4283206530","https://openalex.org/W4308083831","https://openalex.org/W4321637192","https://openalex.org/W4360831816","https://openalex.org/W4381327585","https://openalex.org/W4383749557","https://openalex.org/W4388214724"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W595346907","https://openalex.org/W153296825","https://openalex.org/W598989511","https://openalex.org/W2375779923","https://openalex.org/W2041986468","https://openalex.org/W1967800214","https://openalex.org/W2055675609"],"abstract_inverted_index":{"Field-programmable":[0],"gate":[1],"arrays":[2],"(FPGAs)":[3],"are":[4,174],"an":[5,256,273,289,293],"ideal":[6],"candidate":[7],"for":[8,38,80,89,115,251,297],"accelerating":[9],"graph":[10],"neural":[11],"networks":[12],"(GNNs).":[13],"However,":[14,47],"the":[15,36,61,92,110,116,126,130,134,139,171,186,196,202,283],"FPGA":[16,39,105],"redeployment":[17,40],"process":[18,118],"is":[19,98],"time-consuming":[20],"when":[21,41],"updating":[22],"or":[23],"switching":[24,42,53],"between":[25,43,176],"diverse":[26],"GNN":[27,33,45,81,96,121,204,285],"models":[28,97,205],"across":[29],"different":[30,44,120],"applications.":[31],"Existing":[32],"processors":[34],"eliminate":[35],"need":[37],"models.":[46,122],"adapting":[48],"matrix":[49,151,157,162],"multiplication":[50,152,158,163],"types":[51],"by":[52,255,265],"processing":[54],"units":[55,173],"decreases":[56],"hardware":[57,69,187,193],"utilization.":[58,188],"In":[59],"addition,":[60],"bandwidth":[62,143],"of":[63,95,119,141,181,258,292],"DDR":[64],"limits":[65],"further":[66],"improvements":[67],"in":[68,133,279],"performance.":[70,167],"This":[71],"article":[72],"proposes":[73],"a":[74,149,192],"highly":[75],"flexible":[76],"FPGA-based":[77,294],"overlay":[78,249,295],"processor":[79,296],"accelerations.":[82],"Graph-OPU":[83,166,169,213,244,271,287],"provides":[84],"excellent":[85],"flexibility":[86],"and":[87,101,112,128,160,178,200,219,230,241,261,303],"programmability":[88],"users,":[90],"as":[91,223,225],"executable":[93],"code":[94],"automatically":[99],"compiled":[100],"reloaded":[102],"without":[103],"requiring":[104],"redeployment.":[106],"First,":[107],"we":[108,124,147,190],"customize":[109,125],"compiler":[111],"instruction":[113],"sets":[114],"inference":[117],"Second,":[123],"datapath":[127],"optimize":[129],"data":[131],"format":[132],"microarchitecture":[135],"to":[136,153,216,227,237],"fully":[137],"leverage":[138],"advantages":[140],"high":[142,300],"memory":[144],"(HBM).":[145],"Third,":[146],"design":[148],"unified":[150],"handle":[154],"both":[155],"sparse-dense":[156],"(SpMM)":[159],"general":[161],"(GEMM),":[164],"enhancing":[165],"During":[168],"execution,":[170],"computational":[172],"shared":[175],"SpMM":[177],"GEMM":[179],"instead":[180],"being":[182],"switched,":[183],"which":[184],"improves":[185],"Finally,":[189],"implement":[191],"prototype":[194],"on":[195,239,268],"Xilinx":[197],"Alveo":[198],"U50":[199],"test":[201],"mainstream":[203],"using":[206],"various":[207],"datasets.":[208],"Experimental":[209],"results":[210],"show":[211],"that":[212],"achieves":[214],"up":[215,226],"1,654":[217],"\\(\\times\\)":[218,221,229,232,260,267,276],"63":[220],"speedup,":[222,302],"well":[224],"5,305":[228],"422":[231],"energy":[233,263,304],"efficiency":[234,264],"boosts,":[235],"compared":[236],"implementations":[238],"CPU":[240],"GPU,":[242],"respectively.":[243],"outperforms":[245],"state-of-the-art":[246],"(SOTA)":[247],"end-to-end":[248,280],"accelerators":[250],"GNN,":[252],"reducing":[253],"latency":[254,281],"average":[257,274],"1.36":[259],"improving":[262],"1.41":[266],"average.":[269],"Moreover,":[270],"exhibits":[272],"1.45":[275],"speed":[277],"improvement":[278],"over":[282],"SOTA":[284],"processor.":[286],"represents":[288],"in-depth":[290],"study":[291],"GNNs,":[298],"offering":[299],"flexibility,":[301],"efficiency.":[305]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
