{"id":"https://openalex.org/W4391053455","doi":"https://doi.org/10.1145/3635035.3635036","title":"Non-Blocking GPU-CPU Notifications to Enable More GPU-CPU Parallelism","display_name":"Non-Blocking GPU-CPU Notifications to Enable More GPU-CPU Parallelism","publication_year":2024,"publication_date":"2024-01-18","ids":{"openalex":"https://openalex.org/W4391053455","doi":"https://doi.org/10.1145/3635035.3635036"},"language":"en","primary_location":{"id":"doi:10.1145/3635035.3635036","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3635035.3635036","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086176033","display_name":"Bengisu Elis","orcid":"https://orcid.org/0000-0002-0781-8206"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Bengisu Elis","raw_affiliation_strings":["Technical University of Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0002-0781-8206","affiliations":[{"raw_affiliation_string":"Technical University of Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012779678","display_name":"Olga Pearce","orcid":"https://orcid.org/0000-0002-1904-9627"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Olga Pearce","raw_affiliation_strings":["Lawrence Livermore National Laboratory, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-1904-9627","affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, United States of America","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109547820","display_name":"David Boehme","orcid":null},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Boehme","raw_affiliation_strings":["Lawrence Livermore National Laboratory, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-4159-1519","affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, United States of America","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079015680","display_name":"Jason Burmark","orcid":"https://orcid.org/0009-0005-3036-0108"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Burmark","raw_affiliation_strings":["Lawrence Livermore National Laboratory, United States of America"],"raw_orcid":"https://orcid.org/0009-0005-3036-0108","affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, United States of America","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045289712","display_name":"Martin Schulz","orcid":"https://orcid.org/0000-0001-9013-435X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Schulz","raw_affiliation_strings":["Technical University of Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0001-9013-435X","affiliations":[{"raw_affiliation_string":"Technical University of Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086176033"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":1.8083,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.8368336,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8807225227355957},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7510762214660645},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6994272470474243},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.6460843682289124},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.6249205470085144},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.6000086665153503},{"id":"https://openalex.org/keywords/host","display_name":"Host (biology)","score":0.4526606798171997},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4130586087703705},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.26943129301071167},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.059018105268478394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8807225227355957},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7510762214660645},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6994272470474243},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.6460843682289124},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.6249205470085144},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.6000086665153503},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.4526606798171997},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4130586087703705},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.26943129301071167},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.059018105268478394},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3635035.3635036","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3635035.3635036","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5816449448","display_name":null,"funder_award_id":"DE-AC52-07NA27344","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6351350647","display_name":null,"funder_award_id":"DE-AC52-07NA27344","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G8374966015","display_name":null,"funder_award_id":"AC52-07NA27344","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8531805188","display_name":"Plasma Exascale-Performance Simulations CoE - Pushing flagship plasma simulations codes to tackle exascale-enabled Grand Challenges via performance optimisation and codesign","funder_award_id":"101093261","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G991281326","display_name":null,"funder_award_id":"AC52-07NA27344","funder_id":"https://openalex.org/F4320338286","funder_display_name":"Lawrence Livermore National Laboratory"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"},{"id":"https://openalex.org/F4320338286","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2040561634","https://openalex.org/W2117859758","https://openalex.org/W2767210925","https://openalex.org/W2926767350","https://openalex.org/W3028661980","https://openalex.org/W3028670792","https://openalex.org/W3039595577","https://openalex.org/W3043110088","https://openalex.org/W3128575430","https://openalex.org/W3141650078","https://openalex.org/W3173778203","https://openalex.org/W3198396876","https://openalex.org/W3209568355","https://openalex.org/W4296888779","https://openalex.org/W4321636597"],"related_works":["https://openalex.org/W2392835431","https://openalex.org/W2126932387","https://openalex.org/W1965371215","https://openalex.org/W2353762239","https://openalex.org/W2185938410","https://openalex.org/W2484966135","https://openalex.org/W2015050211","https://openalex.org/W2045183646","https://openalex.org/W2108990487","https://openalex.org/W2162409446"],"abstract_inverted_index":{"GPUs":[0,12,20],"are":[1,10],"increasingly":[2],"popular":[3],"in":[4,34,61],"HPC":[5],"systems,":[6],"and":[7,25,37,58,77,80,88,126],"more":[8],"applications":[9],"adopting":[11],"each":[13],"day.":[14],"However,":[15],"the":[16,73,86,91,108,111,122,131,144,148],"control":[17,54,99],"synchronization":[18,55],"of":[19,56,75,90,110],"with":[21,63,81,96,147],"CPUs":[22],"is":[23],"suboptimal":[24],"only":[26],"possible":[27],"after":[28],"GPU":[29,65,78,127],"kernel":[30],"termination":[31],"points,":[32],"resulting":[33],"serialized":[35],"host":[36,59],"device":[38,57],"tasks.":[39],"In":[40],"this":[41,68],"paper,":[42],"we":[43,71,135],"propose":[44],"a":[45,140],"novel":[46],"CPU-GPU":[47],"notification":[48,69,93,117,150],"method":[49],"that":[50,82],"enables":[51],"non-blocking":[52],"in-kernel":[53,97],"tasks":[60],"combination":[62],"persistent":[64],"kernels.":[66],"Using":[67],"method,":[70,118],"increase":[72],"overlap":[74],"CPU":[76,125],"execution":[79,132],"parallelism.":[83],"We":[84,106],"present":[85],"concept":[87],"structure":[89],"proposed":[92],"mechanism":[94],"together":[95],"GPU-CPU":[98],"synchronization,":[100],"using":[101,114,139],"halo-exchange":[102,112,145],"as":[103,119,121],"an":[104],"example.":[105],"analyze":[107],"performance":[109,141],"pattern":[113,146],"our":[115,137],"new":[116,149],"well":[120],"interference":[123],"between":[124],"operations":[128],"due":[129],"to":[130],"overlap.":[133],"Finally,":[134],"verify":[136],"results":[138],"model":[142],"covering":[143],"method.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-01T08:46:32.239190","created_date":"2025-10-10T00:00:00"}
