{"id":"https://openalex.org/W7116405257","doi":"https://doi.org/10.1145/3754598.3754652","title":"IRIS-MASH: Efficient Multi-device Asynchronous Multi-Stream Heterogeneous Computing","display_name":"IRIS-MASH: Efficient Multi-device Asynchronous Multi-Stream Heterogeneous Computing","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W7116405257","doi":"https://doi.org/10.1145/3754598.3754652"},"language":null,"primary_location":{"id":"doi:10.1145/3754598.3754652","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754598.3754652","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3754598.3754652","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031971317","display_name":"Narasinga Rao Miniskar","orcid":"https://orcid.org/0000-0001-8259-8891"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Narasinga Rao Miniskar","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-8259-8891","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025843559","display_name":"Aaron R. Young","orcid":"https://orcid.org/0000-0002-9301-8757"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron R. Young","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-5448-4667","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004329088","display_name":"Mohammad Alaul Haque Monil","orcid":"https://orcid.org/0000-0003-3419-4037"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Alaul Haque Monil","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0003-3419-4037","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014340343","display_name":"Kazi Asifuzzaman","orcid":"https://orcid.org/0000-0002-4004-4791"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kazi Asifuzzaman","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-4004-4791","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074912565","display_name":"Beau Johnston","orcid":"https://orcid.org/0000-0001-5426-1415"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Beau Johnston","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-5426-1415","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019745966","display_name":"Keita Teranishi","orcid":"https://orcid.org/0000-0001-6647-2690"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Keita Teranishi","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-6647-2690","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5120880202","display_name":"Jeffrey S. Vetter","orcid":null},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey S. Vetter","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-2449-6720","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5031971317"],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.64060258,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"764","last_page":"773"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9413999915122986,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9413999915122986,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.007899999618530273,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.8159000277519226},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.659500002861023},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5379999876022339},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5217000246047974},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.5169000029563904},{"id":"https://openalex.org/keywords/heterogeneous-network","display_name":"Heterogeneous network","score":0.40149998664855957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8479999899864197},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.8159000277519226},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.659500002861023},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6284000277519226},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5379999876022339},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5217000246047974},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.5169000029563904},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.40560001134872437},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.40149998664855957},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36980000138282776},{"id":"https://openalex.org/C150762246","wikidata":"https://www.wikidata.org/wiki/Q4354073","display_name":"Reactive programming","level":4,"score":0.34119999408721924},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2874999940395355},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2615000009536743}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3754598.3754652","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754598.3754652","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3754598.3754652","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754598.3754652","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W111941793","https://openalex.org/W1984848758","https://openalex.org/W1988888548","https://openalex.org/W2072687243","https://openalex.org/W2087440962","https://openalex.org/W2121893797","https://openalex.org/W2142677441","https://openalex.org/W2168612748","https://openalex.org/W2965891118","https://openalex.org/W3160498990","https://openalex.org/W4200300530","https://openalex.org/W4205627056","https://openalex.org/W4248806677","https://openalex.org/W4312303242","https://openalex.org/W4316923778","https://openalex.org/W4318970015","https://openalex.org/W4367147549","https://openalex.org/W4383219947","https://openalex.org/W4386709682","https://openalex.org/W4388561315","https://openalex.org/W4388561326","https://openalex.org/W4388581050","https://openalex.org/W4390188487","https://openalex.org/W4401017730"],"related_works":[],"abstract_inverted_index":{"In":[0],"the":[1,115,130,159,222],"rapidly":[2],"evolving":[3],"field":[4],"of":[5,40,117,133,154,208,224],"high-performance":[6],"computing":[7,42],"(HPC),":[8],"effectively":[9,100],"leveraging":[10],"heterogeneous":[11,41,77,92,142,160,229],"devices":[12,69,78,93],"through":[13,122],"asynchronous":[14,24,61,98,108,184,225],"task":[15,25,54,62,109,120,226],"programming":[16,26,55,72,104,227],"is":[17,79],"paramount.":[18],"This":[19,85,202],"paper":[20],"presents":[21,87],"a":[22,30,37,141,152,171,187,213],"robust":[23],"model":[27,112],"tailored":[28],"for":[29,76,216],"multi-device,":[31],"multi-stream":[32],"execution":[33,132],"environment":[34],"that":[35],"incorporates":[36],"diverse":[38],"array":[39],"units,":[43],"including":[44],"GPUs":[45],"from":[46,158,175],"various":[47],"vendors":[48],"and":[49,128,147,186,200,211,219],"other":[50],"accelerators.":[51],"Current":[52],"state-of-the-art":[53],"models":[56],"provide":[57],"methodologies":[58],"to":[59,81,94,178,193],"support":[60,75],"executions,":[63],"but":[64],"they":[65],"typically":[66],"handle":[67],"homogeneous":[68],"using":[70,101,151],"native":[71,103],"languages,":[73],"while":[74],"limited":[80],"frameworks":[82],"like":[83],"OpenCL.":[84],"gap":[86],"significant":[88],"challenges":[89],"in":[90,221,228],"abstracting":[91],"harness":[95],"their":[96,102],"true":[97],"capabilities":[99],"languages.":[105],"By":[106],"implementing":[107],"execution,":[110],"our":[111],"significantly":[113,204],"boosts":[114],"performance":[116,150,172,190],"tiled":[118,155],"algorithm":[119,156],"graphs":[121],"overlapping":[123],"data":[124],"transfers":[125],"with":[126],"computation":[127,206],"enabling":[129],"simultaneous":[131],"multiple":[134],"kernels.":[135],"We":[136],"integrate":[137],"this":[138],"approach":[139,203],"into":[140],"Intelligent":[143],"Runtime":[144],"System":[145],"(IRIS)":[146],"assess":[148],"its":[149],"suite":[153],"benchmarks":[157],"math":[161],"kernels":[162],"library":[163],"(MatRIS)":[164],"based":[165],"on":[166],"IRIS.":[167],"Experimental":[168],"results":[169],"demonstrate":[170],"improvement":[173],"ranging":[174],"1.6":[176],"\u00d7":[177,180],"2":[179],"over":[181],"IRIS":[182],"without":[183],"support,":[185],"notable":[188],"22%":[189],"enhancement":[191],"compared":[192],"established":[194],"runtime":[195],"systems":[196],"such":[197],"as":[198],"StarPU":[199],"PaRSEC.":[201],"improves":[205],"efficiency":[207],"HPC":[209],"workflows":[210],"provides":[212],"solid":[214],"base":[215],"future":[217],"exploration":[218],"development":[220],"area":[223],"systems.":[230]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-12-21T00:00:00"}
