{"id":"https://openalex.org/W4401408731","doi":"https://doi.org/10.1145/3673038.3673100","title":"Enabling Performance Observability for Heterogeneous HPC Workflows with SOMA","display_name":"Enabling Performance Observability for Heterogeneous HPC Workflows with SOMA","publication_year":2024,"publication_date":"2024-08-08","ids":{"openalex":"https://openalex.org/W4401408731","doi":"https://doi.org/10.1145/3673038.3673100"},"language":"en","primary_location":{"id":"doi:10.1145/3673038.3673100","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3673038.3673100","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673100","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 53rd International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673100","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009000154","display_name":"Dewi Yokelson","orcid":"https://orcid.org/0000-0003-1453-5906"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dewi Yokelson","raw_affiliation_strings":["University of Oregon, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Oregon, United States of America","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101964384","display_name":"Mikhail Titov","orcid":"https://orcid.org/0000-0003-2357-7382"},"institutions":[{"id":"https://openalex.org/I200870766","display_name":"Brookhaven National Laboratory","ror":"https://ror.org/02ex6cf31","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I200870766","https://openalex.org/I39565521","https://openalex.org/I4210142672"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mikhail Titov","raw_affiliation_strings":["Brookhaven National Laboratory, United States of America"],"affiliations":[{"raw_affiliation_string":"Brookhaven National Laboratory, United States of America","institution_ids":["https://openalex.org/I200870766"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072939925","display_name":"Srinivasan Ramesh","orcid":"https://orcid.org/0000-0002-1679-922X"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srinivasan Ramesh","raw_affiliation_strings":["Nvidia, United States of America"],"affiliations":[{"raw_affiliation_string":"Nvidia, United States of America","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035703383","display_name":"Ozgur O. Kilic","orcid":"https://orcid.org/0000-0003-2129-408X"},"institutions":[{"id":"https://openalex.org/I200870766","display_name":"Brookhaven National Laboratory","ror":"https://ror.org/02ex6cf31","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I200870766","https://openalex.org/I39565521","https://openalex.org/I4210142672"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ozgur Kilic","raw_affiliation_strings":["Brookhaven National Laboratory, United States of America"],"affiliations":[{"raw_affiliation_string":"Brookhaven National Laboratory, United States of America","institution_ids":["https://openalex.org/I200870766"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009954932","display_name":"Matteo Turilli","orcid":"https://orcid.org/0000-0003-0527-1435"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matteo Turilli","raw_affiliation_strings":["Rutgers University, United States of America"],"affiliations":[{"raw_affiliation_string":"Rutgers University, United States of America","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038763846","display_name":"Shantenu Jha","orcid":"https://orcid.org/0000-0002-5040-026X"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shantenu Jha","raw_affiliation_strings":["Rutgers University, United States of America"],"affiliations":[{"raw_affiliation_string":"Rutgers University, United States of America","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029524538","display_name":"Allen D. Malony","orcid":null},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Allen Malony","raw_affiliation_strings":["University of Oregon, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Oregon, United States of America","institution_ids":["https://openalex.org/I181233156"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5009000154"],"corresponding_institution_ids":["https://openalex.org/I181233156"],"apc_list":null,"apc_paid":null,"fwci":1.047,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.77919468,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"220","last_page":"230"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.9013179540634155},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8389186859130859},{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.7067534327507019},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6438048481941223},{"id":"https://openalex.org/keywords/workflow-management-system","display_name":"Workflow management system","score":0.498762845993042},{"id":"https://openalex.org/keywords/workflow-technology","display_name":"Workflow technology","score":0.45158371329307556},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.4402725398540497},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4350854158401489},{"id":"https://openalex.org/keywords/workflow-engine","display_name":"Workflow engine","score":0.4208087921142578},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.29893508553504944},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18853268027305603},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.0937809944152832}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.9013179540634155},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8389186859130859},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.7067534327507019},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6438048481941223},{"id":"https://openalex.org/C140824633","wikidata":"https://www.wikidata.org/wiki/Q2808660","display_name":"Workflow management system","level":3,"score":0.498762845993042},{"id":"https://openalex.org/C19612761","wikidata":"https://www.wikidata.org/wiki/Q8034836","display_name":"Workflow technology","level":3,"score":0.45158371329307556},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.4402725398540497},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4350854158401489},{"id":"https://openalex.org/C188220564","wikidata":"https://www.wikidata.org/wiki/Q3325097","display_name":"Workflow engine","level":3,"score":0.4208087921142578},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.29893508553504944},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18853268027305603},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0937809944152832},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3673038.3673100","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3673038.3673100","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673100","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 53rd International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:2478423","is_oa":true,"landing_page_url":"https://www.osti.gov/servlets/purl/2478423","pdf_url":"https://www.osti.gov/biblio/2478423","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.1145/3673038.3673100","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3673038.3673100","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3673038.3673100","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 53rd International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3971516050","display_name":null,"funder_award_id":"DE-SC001270","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G5633341255","display_name":null,"funder_award_id":"No. DE-SC0012704","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6357584807","display_name":null,"funder_award_id":"SC0012704","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6551061839","display_name":null,"funder_award_id":"E-SC0012704","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7642226822","display_name":null,"funder_award_id":"DE-SC0012704","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7957492830","display_name":null,"funder_award_id":"Contract No. DE-SC0012704","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4401408731.pdf"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W2035562244","https://openalex.org/W2038924755","https://openalex.org/W2154983209","https://openalex.org/W2266789289","https://openalex.org/W2491198319","https://openalex.org/W2767194790","https://openalex.org/W2796177908","https://openalex.org/W2943761765","https://openalex.org/W2949221896","https://openalex.org/W2962978410","https://openalex.org/W2963104418","https://openalex.org/W2963919096","https://openalex.org/W2963921579","https://openalex.org/W2997905860","https://openalex.org/W2999268859","https://openalex.org/W3004004277","https://openalex.org/W3086484938","https://openalex.org/W3104388511","https://openalex.org/W3123205047","https://openalex.org/W3165027330","https://openalex.org/W3194513155","https://openalex.org/W4286307993","https://openalex.org/W4312715647","https://openalex.org/W4387006225","https://openalex.org/W4388561700","https://openalex.org/W4388581099","https://openalex.org/W4388581276","https://openalex.org/W4392613491","https://openalex.org/W4396813169","https://openalex.org/W4399296903"],"related_works":["https://openalex.org/W2379425324","https://openalex.org/W2373541195","https://openalex.org/W2376410910","https://openalex.org/W2091596831","https://openalex.org/W2365658322","https://openalex.org/W2352344200","https://openalex.org/W329008868","https://openalex.org/W1987485359","https://openalex.org/W2367975678","https://openalex.org/W2906819665"],"abstract_inverted_index":{"Heterogeneous":[0],"workflows":[1,158],"represent":[2],"a":[3,120,135,160,227],"promising":[4],"approach":[5],"for":[6,107,125],"overcoming":[7],"traditional":[8],"application":[9,181],"performance":[10,65,84,122,152,167,176,215],"limitations":[11],"and":[12,28,31,48,74,89,171,182,223,240],"to":[13,235],"accelerate":[14],"scientific":[15,37],"insight":[16],"on":[17],"high-performance":[18],"computing":[19],"(HPC)":[20],"platforms.":[21],"As":[22],"HPC":[23,127,200],"platforms":[24],"grow":[25],"in":[26,159,191],"size":[27],"complexity,":[29],"managing":[30],"optimizing":[32],"workflow":[33,43,80,102,112,166,183,201,225],"resources":[34],"while":[35],"maximizing":[36],"output":[38],"assumes":[39],"vital":[40],"importance.":[41],"Optimal":[42],"resource":[44,238],"allocation":[45,239],"requires":[46,168],"high-quality":[47],"timely":[49],"information":[50,92,230],"about":[51],"the":[52,55,58,61,64,67,75,79,96,101,105,108,116,130,151,180,214,218,224],"state":[53],"of":[54,60,66,78,98,111,118,155,165,175,217],"hardware":[56],"resources,":[57],"status":[59,77],"pending":[62],"tasks,":[63,220],"tasks":[68],"that":[69,87,140],"have":[70],"already":[71],"been":[72],"executed,":[73],"current":[76],"itself.":[81],"A":[82],"robust":[83],"observability":[85,123,164,193],"framework":[86,124],"captures":[88],"delivers":[90],"this":[91,205],"can":[93,149,211,231],"fundamentally":[94],"improve":[95],"quality":[97],"decision-making":[99],"within":[100,198],"system,":[103],"setting":[104],"stage":[106],"adaptive":[109],"execution":[110],"tasks.":[113],"We":[114],"propose":[115],"use":[117],"SOMA,":[119],"service-based":[121,141],"such":[126],"workflows.":[128],"With":[129],"RADICAL-Pilot":[131],"runtime":[132],"system":[133,221],"as":[134,195,226],"development":[136],"vehicle,":[137],"SOMA":[138,210],"demonstrates":[139],"architectures":[142],"coupled":[143],"with":[144],"an":[145,199],"appropriate":[146],"data":[147,177],"model":[148],"serve":[150],"monitoring":[153],"needs":[154],"large-scale":[156],"ensemble":[157],"low-overhead":[161],"fashion.":[162],"Effective":[163],"exporting,":[169],"storing,":[170],"analyzing":[172],"several":[173],"types":[174],"from":[178],"across":[179],"software":[184,202],"stacks.":[185],"Our":[186],"study":[187],"finds":[188],"significant":[189],"benefits":[190],"integrating":[192],"frameworks":[194],"first-class":[196],"citizens":[197],"stack.":[203],"In":[204],"paper,":[206],"we":[207],"demonstrate":[208],"how":[209],"simultaneously":[212],"observe":[213],"states":[216],"individual":[219],"hardware,":[222],"whole.":[228],"Such":[229],"then":[232],"be":[233],"employed":[234],"calculate":[236],"better":[237],"task":[241],"configuration.":[242]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
