{"id":"https://openalex.org/W3186309308","doi":"https://doi.org/10.1145/3437359.3465581","title":"A Heterogeneous MPI+PPL Task Scheduling Approach for Asynchronous Many-Task Runtime Systems","display_name":"A Heterogeneous MPI+PPL Task Scheduling Approach for Asynchronous Many-Task Runtime Systems","publication_year":2021,"publication_date":"2021-07-17","ids":{"openalex":"https://openalex.org/W3186309308","doi":"https://doi.org/10.1145/3437359.3465581","mag":"3186309308"},"language":"en","primary_location":{"id":"doi:10.1145/3437359.3465581","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3437359.3465581","pdf_url":null,"source":{"id":"https://openalex.org/S4306523034","display_name":"Practice and Experience in Advanced Research Computing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Practice and Experience in Advanced Research Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020115467","display_name":"John Holmen","orcid":"https://orcid.org/0000-0002-5934-2641"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"John Holmen","raw_affiliation_strings":["Scientific Computing and Imaging Institute, University of Utah, USA"],"affiliations":[{"raw_affiliation_string":"Scientific Computing and Imaging Institute, University of Utah, USA","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072576754","display_name":"Damodar Sahasrabudhe","orcid":"https://orcid.org/0000-0003-0796-1966"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Damodar Sahasrabudhe","raw_affiliation_strings":["Scientific Computing and Imaging Institute, University of Utah, USA"],"affiliations":[{"raw_affiliation_string":"Scientific Computing and Imaging Institute, University of Utah, USA","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018125253","display_name":"Martin Berzins","orcid":"https://orcid.org/0000-0002-5419-0634"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin Berzins","raw_affiliation_strings":["Scientific Computing and Imaging Institute, University of Utah, USA"],"affiliations":[{"raw_affiliation_string":"Scientific Computing and Imaging Institute, University of Utah, USA","institution_ids":["https://openalex.org/I223532165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020115467"],"corresponding_institution_ids":["https://openalex.org/I223532165"],"apc_list":null,"apc_paid":null,"fwci":0.9211,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.72246766,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8891245126724243},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.6606670618057251},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.6305826902389526},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6275708079338074},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.6264243721961975},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.551138699054718},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5217245817184448},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5185461640357971},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.496142715215683},{"id":"https://openalex.org/keywords/exascale-computing","display_name":"Exascale computing","score":0.4938374161720276},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4703277051448822},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.4578704833984375},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4439120888710022},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.43733906745910645},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3905957043170929},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3195362091064453},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.16974645853042603},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13421520590782166},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.09240713715553284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8891245126724243},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.6606670618057251},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.6305826902389526},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6275708079338074},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.6264243721961975},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.551138699054718},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5217245817184448},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5185461640357971},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.496142715215683},{"id":"https://openalex.org/C2778837361","wikidata":"https://www.wikidata.org/wiki/Q2450880","display_name":"Exascale computing","level":3,"score":0.4938374161720276},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4703277051448822},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.4578704833984375},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4439120888710022},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.43733906745910645},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3905957043170929},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3195362091064453},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.16974645853042603},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13421520590782166},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.09240713715553284},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3437359.3465581","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3437359.3465581","pdf_url":null,"source":{"id":"https://openalex.org/S4306523034","display_name":"Practice and Experience in Advanced Research Computing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Practice and Experience in Advanced Research Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4699999988079071,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G3961479052","display_name":null,"funder_award_id":"UTA19-001215","funder_id":"https://openalex.org/F4320310620","funder_display_name":"University of Texas at Austin"}],"funders":[{"id":"https://openalex.org/F4320310620","display_name":"University of Texas at Austin","ror":"https://ror.org/00hj54h04"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W57243869","https://openalex.org/W765424690","https://openalex.org/W1569680480","https://openalex.org/W1975372360","https://openalex.org/W2036551003","https://openalex.org/W2078794610","https://openalex.org/W2087440962","https://openalex.org/W2121893797","https://openalex.org/W2149247365","https://openalex.org/W2296386193","https://openalex.org/W2506485539","https://openalex.org/W2542921230","https://openalex.org/W2740841959","https://openalex.org/W2794282368","https://openalex.org/W2807986049","https://openalex.org/W2905532857","https://openalex.org/W2912234501","https://openalex.org/W2914755809","https://openalex.org/W2915005184","https://openalex.org/W2994372971","https://openalex.org/W2998644365","https://openalex.org/W3036070480","https://openalex.org/W3094876077","https://openalex.org/W3114095374","https://openalex.org/W3209449653","https://openalex.org/W4230372634","https://openalex.org/W4240215573","https://openalex.org/W4300502652"],"related_works":["https://openalex.org/W2899708405","https://openalex.org/W1815597787","https://openalex.org/W2127811544","https://openalex.org/W2904187158","https://openalex.org/W2139119167","https://openalex.org/W2131296084","https://openalex.org/W2560584690","https://openalex.org/W1983500457","https://openalex.org/W1198893100","https://openalex.org/W3192695480"],"abstract_inverted_index":{"Asynchronous":[0],"many-task":[1,125],"runtime":[2,92,126],"systems":[3,97],"and":[4,22,111,164,178,193],"MPI+X":[5],"hybrid":[6],"parallelism":[7],"approaches":[8],"have":[9,54],"shown":[10,55,140,200],"promise":[11,56],"for":[12,31,43,57,71,79,93,131,141],"helping":[13,58],"manage":[14,59],"the":[15,94,112,119,165],"increasing":[16,34],"complexity":[17],"of":[18,37,148],"nodes":[19],"in":[20,118],"current":[21],"emerging":[23],"high":[24],"performance":[25,155],"computing":[26],"(HPC)":[27],"systems,":[28,39],"including":[29],"those":[30],"exascale.":[32],"The":[33],"architectural":[35],"diversity":[36],"these":[38,73],"however,":[40],"poses":[41],"challenges":[42,86],"runtimes":[44],"supporting":[45],"more":[46],"homogeneous":[47],"HPC":[48],"systems.":[49],"Performance":[50],"portability":[51],"layers":[52],"(PPL)":[53],"this":[60,162],"diversity.":[61],"This":[62,101],"paper":[63],"describes":[64],"a":[65,91,106,133,172],"heterogeneous":[66,96,107,183],"MPI+PPL":[67],"task":[68,109],"scheduling":[69],"approach":[70,102],"combining":[72],"promising":[74],"solutions":[75],"with":[76,128],"additional":[77,129],"consideration":[78,130],"parallel":[80,134],"third":[81,135],"party":[82,136],"libraries":[83],"facing":[84],"similar":[85],"to":[87,158,170,176,180,188],"help":[88],"prepare":[89],"such":[90],"diverse":[95],"accompanying":[98,113,166],"exascale":[99],"computing.":[100],"is":[103],"demonstrated":[104],"using":[105,161,201],"MPI+Kokkos":[108],"scheduler":[110,163],"portable":[114,167],"abstractions":[115,168],"[16]":[116,169],"implemented":[117],"Uintah":[120,150],"Computational":[121],"Framework,":[122],"an":[123],"asynchronous":[124],"system,":[127],"hypre,":[132],"library.":[137],"Results":[138],"are":[139,198],"two":[142],"challenging":[143],"problems":[144],"executing":[145],"workloads":[146],"representative":[147],"typical":[149],"applications.":[151],"These":[152],"results":[153],"show":[154],"improvements":[156],"up":[157],"4.4x":[159],"when":[160],"port":[171],"previously":[173],"MPI-Only":[174],"problem":[175],"Kokkos::OpenMP":[177],"Kokkos::CUDA":[179],"improve":[181],"complex":[182],"node":[184],"use.":[185],"Good":[186],"strong-scaling":[187],"1,024":[189],"NVIDIA":[190],"V100":[191],"GPUs":[192],"512":[194],"IBM":[195],"POWER9":[196],"processor":[197],"also":[199],"MPI+Kokkos::OpenMP+Kokkos::CUDA":[202],"at":[203],"scale.":[204]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
