{"id":"https://openalex.org/W4284960270","doi":"https://doi.org/10.1145/3491418.3535136","title":"A Design Pattern for Recoverable Job Management","display_name":"A Design Pattern for Recoverable Job Management","publication_year":2022,"publication_date":"2022-07-08","ids":{"openalex":"https://openalex.org/W4284960270","doi":"https://doi.org/10.1145/3491418.3535136"},"language":"en","primary_location":{"id":"doi:10.1145/3491418.3535136","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3491418.3535136","pdf_url":null,"source":{"id":"https://openalex.org/S4306523034","display_name":"Practice and Experience in Advanced Research Computing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Practice and Experience in Advanced Research Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033086761","display_name":"Richard Cardone","orcid":"https://orcid.org/0000-0002-8668-6220"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Richard Cardone","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002759577","display_name":"Joe Stubbs","orcid":"https://orcid.org/0000-0002-8644-0300"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joe Stubbs","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069423477","display_name":"Steve Black","orcid":"https://orcid.org/0000-0002-9504-7981"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steve Black","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102324065","display_name":"Christian Garcia","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christian Garcia","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083751527","display_name":"Anagha Jamthe","orcid":"https://orcid.org/0000-0002-8744-2439"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anagha Jamthe","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090383040","display_name":"Mike Packard","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mike Packard","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112266309","display_name":"Smruti Padhy","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Smruti Padhy","raw_affiliation_strings":["Texas Advanced Computing Center, The University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, The University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5033086761"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.1381,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.45538421,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9334999918937683,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8010340929031372},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.7626240253448486},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7333899140357971},{"id":"https://openalex.org/keywords/middleware","display_name":"Middleware (distributed applications)","score":0.6675446033477783},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6513451337814331},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6103228330612183},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.574062705039978},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4613032937049866},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.46066394448280334},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4288443624973297},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3543405830860138},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.32858413457870483},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.26095589995384216}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8010340929031372},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.7626240253448486},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7333899140357971},{"id":"https://openalex.org/C169468491","wikidata":"https://www.wikidata.org/wiki/Q146923","display_name":"Middleware (distributed applications)","level":2,"score":0.6675446033477783},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6513451337814331},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6103228330612183},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.574062705039978},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4613032937049866},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.46066394448280334},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4288443624973297},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3543405830860138},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.32858413457870483},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.26095589995384216},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3491418.3535136","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3491418.3535136","pdf_url":null,"source":{"id":"https://openalex.org/S4306523034","display_name":"Practice and Experience in Advanced Research Computing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Practice and Experience in Advanced Research Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.5799999833106995}],"awards":[{"id":"https://openalex.org/G5306660993","display_name":null,"funder_award_id":"1931439,1931575","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1649645444","https://openalex.org/W3156001951","https://openalex.org/W6679633611"],"related_works":["https://openalex.org/W986318368","https://openalex.org/W2000785801","https://openalex.org/W2384410913","https://openalex.org/W2352878646","https://openalex.org/W2990194547","https://openalex.org/W2004734601","https://openalex.org/W2130149817","https://openalex.org/W2547038763","https://openalex.org/W2610007503","https://openalex.org/W2149470664"],"abstract_inverted_index":{"Processing":[0],"scientific":[1],"workloads":[2],"involves":[3],"staging":[4],"inputs,":[5],"executing":[6],"and":[7,12,36,54,76],"monitoring":[8],"jobs,":[9],"archiving":[10],"outputs,":[11],"doing":[13],"all":[14],"of":[15,60,78],"this":[16,29],"in":[17,31,62,69],"a":[18],"secure,":[19],"repeatable":[20],"way.":[21],"Specialized":[22],"middleware":[23],"has":[24],"been":[25],"developed":[26],"to":[27,49],"automate":[28],"process":[30],"HPC,":[32],"HTC,":[33],"cloud,":[34],"Kubernetes":[35],"other":[37],"environments.":[38],"This":[39],"paper":[40],"describes":[41],"the":[42,63,74,79],"Job":[43],"Management":[44],"(JM)":[45],"design":[46],"pattern":[47],"used":[48],"enhance":[50],"workload":[51],"reliability,":[52],"scalability":[53],"recovery.":[55],"We":[56,71],"discuss":[57,73],"two":[58],"implementations":[59],"JM":[61],"Tapis":[64],"Jobs":[65],"service,":[66],"both":[67],"currently":[68],"production.":[70],"also":[72],"reliability":[75],"performance":[77],"system":[80],"under":[81],"load,":[82],"such":[83],"as":[84],"when":[85],"10,000":[86],"jobs":[87],"are":[88],"submitted":[89],"at":[90],"once.":[91]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
