{"id":"https://openalex.org/W4416962709","doi":"https://doi.org/10.1109/sbac-pad66369.2025.00024","title":"Accelerating GNN Inference via Automated Parallel Execution on Edge Heterogeneous Platforms","display_name":"Accelerating GNN Inference via Automated Parallel Execution on Edge Heterogeneous Platforms","publication_year":2025,"publication_date":"2025-10-28","ids":{"openalex":"https://openalex.org/W4416962709","doi":"https://doi.org/10.1109/sbac-pad66369.2025.00024"},"language":null,"primary_location":{"id":"doi:10.1109/sbac-pad66369.2025.00024","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sbac-pad66369.2025.00024","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/SBC 37th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101958449","display_name":"Yi-Chien Lin","orcid":"https://orcid.org/0000-0002-1710-1532"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yi-Chien Lin","raw_affiliation_strings":["University of Southern California (USC),Los Angeles,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California (USC),Los Angeles,CA,USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037580197","display_name":"Haoyang Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoyang Fan","raw_affiliation_strings":["University of Southern California (USC),Los Angeles,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California (USC),Los Angeles,CA,USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007152667","display_name":"Sameh Gobriel","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sameh Gobriel","raw_affiliation_strings":["Intel Labs,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs,USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109332170","display_name":"Nilesh Jain","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nilesh Jain","raw_affiliation_strings":["Intel Labs,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs,USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114233272","display_name":"Viktor K. Prasanna","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viktor K. Prasanna","raw_affiliation_strings":["University of Southern California (USC),Los Angeles,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California (USC),Los Angeles,CA,USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101958449"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":1.8427,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90192972,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"168","last_page":"179"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.33059999346733093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.33059999346733093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.2599000036716461,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2054000049829483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5795000195503235},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.559499979019165},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5295000076293945},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.4837000072002411},{"id":"https://openalex.org/keywords/heterogeneous-network","display_name":"Heterogeneous network","score":0.47920000553131104},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4544000029563904},{"id":"https://openalex.org/keywords/stream-processing","display_name":"Stream processing","score":0.4440000057220459}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8495000004768372},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5795000195503235},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.559499979019165},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5295000076293945},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.4837000072002411},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.47920000553131104},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4544000029563904},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.4440000057220459},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4424999952316284},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.4348999857902527},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.4068000018596649},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3619999885559082},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.35350000858306885},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.29840001463890076},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2838999927043915},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26420000195503235},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.2632000148296356},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.25949999690055847}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sbac-pad66369.2025.00024","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sbac-pad66369.2025.00024","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/SBC 37th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1562653311","https://openalex.org/W1570111175","https://openalex.org/W2028240990","https://openalex.org/W2070307035","https://openalex.org/W2106076593","https://openalex.org/W2107285554","https://openalex.org/W2149294210","https://openalex.org/W2153808006","https://openalex.org/W2771111398","https://openalex.org/W2968216808","https://openalex.org/W2969388332","https://openalex.org/W2970929262","https://openalex.org/W2991907795","https://openalex.org/W3043571714","https://openalex.org/W3086105743","https://openalex.org/W3086449553","https://openalex.org/W3096566397","https://openalex.org/W3097300053","https://openalex.org/W3123179168","https://openalex.org/W3131920484","https://openalex.org/W3132107458","https://openalex.org/W3137147200","https://openalex.org/W3159953606","https://openalex.org/W3160405885","https://openalex.org/W3169512402","https://openalex.org/W3172335055","https://openalex.org/W3172512547","https://openalex.org/W3175548485","https://openalex.org/W3186289964","https://openalex.org/W4211165432","https://openalex.org/W4231449374","https://openalex.org/W4249233516","https://openalex.org/W4283314135","https://openalex.org/W4293024010","https://openalex.org/W4312910656","https://openalex.org/W4318541522","https://openalex.org/W4321637087","https://openalex.org/W4384834998","https://openalex.org/W4390873421","https://openalex.org/W4391623921","https://openalex.org/W4392251585","https://openalex.org/W4402475803","https://openalex.org/W4402475839"],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"Graph":[1],"Neural":[2],"Networks":[3],"(GNN)":[4],"have":[5,33],"been":[6],"integrated":[7],"into":[8,35,103],"various":[9,127],"local":[10,14,18],"applications,":[11],"such":[12],"as":[13],"community":[15],"detection":[16],"and":[17,43,74,88,106,131,185,194,204],"code":[19],"assistant,":[20],"making":[21],"edge":[22,31,60,158],"inference":[23,58],"increasingly":[24],"important.":[25],"To":[26,45,91],"support":[27,209],"diverse":[28],"workloads,":[29],"state-of-the-art":[30,120,157,187],"devices":[32],"evolved":[34],"heterogeneous":[36,61,67,76,121,138,159],"platforms,":[37],"integrating":[38],"components":[39],"like":[40],"CPU,":[41],"GPU,":[42],"NPU.":[44],"this":[46],"end,":[47],"we":[48],"propose":[49],"GNX,":[50],"a":[51,71,75,169,177,186],"novel":[52],"GNN":[53,57,72,101,153,211],"system":[54],"that":[55,147,166],"accelerates":[56,150],"on":[59,155],"platforms":[62],"by":[63,119],"leveraging":[64,115],"all":[65],"the":[66,93,96,108,111,116,134,137,163,201],"processing":[68,139,171],"units.":[69],"Given":[70],"model":[73],"platform,":[77],"GNX":[78,99,123,148,173,189,207],"automatically":[79],"generates":[80],"parallel":[81,128],"execution":[82,129],"plans,":[83],"consisting":[84],"of":[85,95,203],"both":[86],"data":[87,183],"pipeline":[89],"parallelism.":[90],"reduce":[92],"complexity":[94],"design":[97],"space,":[98],"converts":[100],"models":[102,154],"coarse-grained":[104],"blocks":[105],"performs":[107],"search":[109],"at":[110],"block":[112],"level.":[113],"By":[114],"APIs":[117],"provided":[118],"frameworks,":[122],"can":[124],"flexibly":[125],"schedule":[126],"plans":[130],"seamlessly":[132],"adjust":[133],"workload":[135],"across":[136],"units":[140],"for":[141],"load-balanced":[142],"execution.":[143],"Our":[144],"study":[145],"shows":[146],"effectively":[149],"three":[151],"widely-used":[152],"two":[156],"platforms.":[160],"Compared":[161,180],"with":[162,181],"baseline":[164],"approach":[165],"uses":[167],"only":[168],"single":[170],"unit,":[172],"achieves":[174,190],"up":[175,191],"to":[176,192,206,208],"2.57\u00d7":[178],"speedup.":[179],"adopting":[182],"parallelism":[184],"scheduler,":[188],"1.90\u00d7":[193],"1.79\u00d7":[195],"speedup,":[196],"respectively.":[197],"We":[198],"also":[199],"discuss":[200],"applicability":[202],"extensions":[205],"other":[210],"models.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-12-03T00:00:00"}
