{"id":"https://openalex.org/W4301207796","doi":"https://doi.org/10.1145/3546000.3546018","title":"Optimize DGL Operations on x86-64 Multi-Core Processors","display_name":"Optimize DGL Operations on x86-64 Multi-Core Processors","publication_year":2022,"publication_date":"2022-06-23","ids":{"openalex":"https://openalex.org/W4301207796","doi":"https://doi.org/10.1145/3546000.3546018"},"language":"en","primary_location":{"id":"doi:10.1145/3546000.3546018","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3546000.3546018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th International Conference on High Performance Compilation, Computing and Communications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015573562","display_name":"Chaorun Liu","orcid":"https://orcid.org/0000-0001-7376-507X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chaorun Liu","raw_affiliation_strings":["National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070555846","display_name":"Huayou Su","orcid":"https://orcid.org/0000-0002-3587-0917"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huayou Su","raw_affiliation_strings":["National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051680867","display_name":"Yong Dou","orcid":"https://orcid.org/0000-0002-1256-8934"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Dou","raw_affiliation_strings":["National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101913669","display_name":"Qinglin Wang","orcid":"https://orcid.org/0000-0002-8286-6566"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinglin Wang","raw_affiliation_strings":["National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5015573562"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09230511,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"117","last_page":"123"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.982200026512146,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.9867662191390991},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7739421725273132},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6585920453071594},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5970094203948975},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4737440049648285},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4530302584171295},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.43495696783065796},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2262953817844391},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19305303692817688},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1739864945411682},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.11615860462188721},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.05919739603996277}],"concepts":[{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.9867662191390991},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7739421725273132},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6585920453071594},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5970094203948975},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4737440049648285},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4530302584171295},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.43495696783065796},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2262953817844391},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19305303692817688},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1739864945411682},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.11615860462188721},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.05919739603996277}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3546000.3546018","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3546000.3546018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th International Conference on High Performance Compilation, Computing and Communications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7047247076","display_name":null,"funder_award_id":"62002365","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2741392386","https://openalex.org/W2914721378","https://openalex.org/W3132185085","https://openalex.org/W3207203625","https://openalex.org/W4244254628"],"related_works":["https://openalex.org/W3215381467","https://openalex.org/W4301207796","https://openalex.org/W2915956107","https://openalex.org/W1992604140","https://openalex.org/W1990800384","https://openalex.org/W2766023593","https://openalex.org/W4386211790","https://openalex.org/W4388466106","https://openalex.org/W2463487949","https://openalex.org/W2053400496"],"abstract_inverted_index":{"Modern":[0],"x86-64":[1,26,36,60,77,100],"processors":[2,37],"have":[3],"strong":[4],"performance":[5,30,54,83,93,156],"due":[6],"to":[7,90,115,127,164],"long":[8,12],"vector":[9,13,125],"units.":[10],"Therefore":[11],"units":[14],"are":[15],"widely":[16],"used":[17],"in":[18,85,132],"CNN-like":[19],"neural":[20],"network":[21],"model":[22],"inference":[23,33,53],"on":[24,34,59,75,96,168],"modern":[25,35],"processors.":[27,61],"However":[28],"the":[29,42,47,56,67,76,81,92,97,117,129,143,155,158],"of":[31,44,49,69,94,103,157],"GNN":[32,52],"is":[38],"poor.":[39],"Unfortunately,":[40],"with":[41],"development":[43],"GNNs":[45],"and":[46,79,105,111,123,166,170],"increase":[48],"graph":[50],"datasets,":[51],"meets":[55],"serious":[57],"challenge":[58],"In":[62,88,134],"this":[63,86],"paper,":[64],"we":[65,107,136],"study":[66],"problem":[68],"poorly":[70],"optimized":[71],"DGL-based":[72],"GAT":[73],"models":[74],"platform,":[78],"analyze":[80],"main":[82,99],"bottlenecks":[84],"case.":[87],"order":[89],"optimize":[91,128],"DGL":[95,160],"two":[98],"platform":[101],"CPUs":[102],"Intel":[104,169],"AMD,":[106],"implement":[108],"a":[109],"simple":[110],"effective":[112],"task":[113,118],"allocator":[114],"balance":[116],"load":[119],"among":[120],"multiple":[121],"cores":[122],"use":[124],"instructions":[126],"core":[130],"operators":[131],"DGL.":[133],"addition,":[135],"also":[137],"propose":[138],"corresponding":[139],"optimization":[140,152],"ideas":[141],"for":[142],"NUMA":[144],"architecture.":[145],"The":[146],"experimental":[147],"results":[148],"show":[149],"that":[150],"our":[151],"method":[153],"improves":[154],"basic":[159],"version":[161],"by":[162],"up":[163],"3.12x":[165],"2.6x":[167],"AMD":[171],"platforms.":[172]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
