{"id":"https://openalex.org/W3201678711","doi":"https://doi.org/10.1145/3472456.3473511","title":"Automatic Generation of High-Performance Inference Kernels for Graph Neural Networks on Multi-Core Systems","display_name":"Automatic Generation of High-Performance Inference Kernels for Graph Neural Networks on Multi-Core Systems","publication_year":2021,"publication_date":"2021-08-09","ids":{"openalex":"https://openalex.org/W3201678711","doi":"https://doi.org/10.1145/3472456.3473511","mag":"3201678711"},"language":"en","primary_location":{"id":"doi:10.1145/3472456.3473511","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3472456.3473511","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3473511","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3473511","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101992907","display_name":"Qiang Fu","orcid":"https://orcid.org/0009-0004-7159-6515"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qiang Fu","raw_affiliation_strings":["George Washington University"],"affiliations":[{"raw_affiliation_string":"George Washington University","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002254350","display_name":"H. Howie Huang","orcid":"https://orcid.org/0000-0001-8588-7680"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H. Howie Huang","raw_affiliation_strings":["George Washington University"],"affiliations":[{"raw_affiliation_string":"George Washington University","institution_ids":["https://openalex.org/I193531525"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101992907"],"corresponding_institution_ids":["https://openalex.org/I193531525"],"apc_list":null,"apc_paid":null,"fwci":0.5826,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.68941671,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8426178693771362},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.6370960474014282},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6185173988342285},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5494817495346069},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5261924862861633},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5123404264450073},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.48331522941589355},{"id":"https://openalex.org/keywords/graph-database","display_name":"Graph database","score":0.44404229521751404},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4210565686225891},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42004865407943726},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.41311269998550415},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4117286205291748},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.33021044731140137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8426178693771362},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.6370960474014282},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6185173988342285},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5494817495346069},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5261924862861633},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5123404264450073},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.48331522941589355},{"id":"https://openalex.org/C176225458","wikidata":"https://www.wikidata.org/wiki/Q595971","display_name":"Graph database","level":3,"score":0.44404229521751404},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4210565686225891},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42004865407943726},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.41311269998550415},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4117286205291748},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.33021044731140137}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3472456.3473511","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3472456.3473511","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3473511","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3472456.3473511","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3472456.3473511","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3473511","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G344781394","display_name":null,"funder_award_id":"1618706","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7416016778","display_name":null,"funder_award_id":"1618706,1717774","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3201678711.pdf","grobid_xml":"https://content.openalex.org/works/W3201678711.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1536060130","https://openalex.org/W1788180225","https://openalex.org/W2000041758","https://openalex.org/W2034102265","https://openalex.org/W2053076698","https://openalex.org/W2170616854","https://openalex.org/W2186615578","https://openalex.org/W2402402867","https://openalex.org/W2468907370","https://openalex.org/W2560674852","https://openalex.org/W2600702321","https://openalex.org/W2624431344","https://openalex.org/W2768532803","https://openalex.org/W2783819585","https://openalex.org/W2784435047","https://openalex.org/W2786320458","https://openalex.org/W2804032941","https://openalex.org/W2807021761","https://openalex.org/W2951135776","https://openalex.org/W2951136539","https://openalex.org/W2951641587","https://openalex.org/W2963076818","https://openalex.org/W2963169753","https://openalex.org/W2963460103","https://openalex.org/W2970971581","https://openalex.org/W3080508754","https://openalex.org/W3100848837","https://openalex.org/W4234988573","https://openalex.org/W6713134421"],"related_works":["https://openalex.org/W2293118914","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W2058965144","https://openalex.org/W2171015181","https://openalex.org/W3167919718","https://openalex.org/W4251718783","https://openalex.org/W2187181201","https://openalex.org/W2778498407","https://openalex.org/W3012895752"],"abstract_inverted_index":{"Graph":[0],"neural":[1,78,94],"networks":[2],"are":[3,53,106],"powerful":[4],"in":[5],"learning":[6],"from":[7],"high-dimensional":[8],"graph-structured":[9],"data,":[10],"for":[11,76,91],"which":[12,81,105],"a":[13,69,83],"number":[14],"of":[15,32],"frameworks":[16],"such":[17,33],"as":[18,102],"DGL":[19],"and":[20,30,112,138,147],"Pytorch-geometrics":[21],"have":[22],"been":[23],"developed":[24],"to":[25,59,156,161],"facilitate":[26],"the":[27,126],"construction,":[28],"training,":[29],"deployment":[31],"models.":[34,96],"Unfortunately,":[35],"existing":[36],"systems":[37,52],"underperform":[38],"when":[39],"inferring":[40],"on":[41,45,128],"huge":[42],"graph":[43,50,77,93,165],"data":[44],"multi-core":[46],"CPUs.":[47],"Furthermore,":[48],"traditional":[49,164],"processing":[51,166],"struggling":[54],"with":[55,143],"complexity":[56],"issues":[57],"due":[58],"their":[60],"low-level":[61],"programming":[62,89],"interfaces.":[63],"In":[64,151],"this":[65],"paper,":[66],"we":[67],"present":[68],"new":[70],"compiler-based":[71],"software":[72],"framework":[73],"Gin":[74,97,123,153],"optimized":[75],"network":[79,95],"inference,":[80],"offers":[82],"user-friendly":[84],"interface,":[85],"via":[86],"an":[87],"intuitive":[88],"model,":[90],"defining":[92],"builds":[98],"high-level":[99],"dataflow":[100],"graphs":[101],"intermediate":[103],"representations,":[104],"transformed":[107],"into":[108,115],"highly":[109],"efficient":[110],"codes":[111],"then":[113],"compiled":[114],"binary":[116],"inference":[117,127],"kernels.":[118],"Our":[119],"evaluation":[120],"shows":[121],"that":[122],"significantly":[124],"accelerates":[125],"billion-edge":[129],"graphs,":[130],"beating":[131],"three":[132],"state-of-the-art":[133],"solutions":[134],"i.e.,":[135],"DGL,":[136],"Tensorflow,":[137],"Pytorch-geometrics,":[139],"by":[140],"31.44\u00d7on":[141],"average,":[142],"much":[144],"higher":[145],"CPU":[146],"memory":[148],"bandwidth":[149],"utilization.":[150],"addition,":[152],"is":[154],"able":[155],"achieve":[157],"considerable":[158],"speedup":[159],"(up":[160],"7.6\u00d7)":[162],"over":[163],"system":[167],"Ligra.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
