{"id":"https://openalex.org/W2020653682","doi":"https://doi.org/10.1145/2600212.2600703","title":"Next generation job management systems for extreme-scale ensemble computing","display_name":"Next generation job management systems for extreme-scale ensemble computing","publication_year":2014,"publication_date":"2014-06-20","ids":{"openalex":"https://openalex.org/W2020653682","doi":"https://doi.org/10.1145/2600212.2600703","mag":"2020653682"},"language":"en","primary_location":{"id":"doi:10.1145/2600212.2600703","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2600212.2600703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd international symposium on High-performance parallel and distributed computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100705364","display_name":"Ke Wang","orcid":"https://orcid.org/0009-0004-6213-1847"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ke Wang","raw_affiliation_strings":["Illinois Institute of Technology, Chicago, IL, USA","[Illinois Institute of Technology, Chicago, IL, USA.]"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, Chicago, IL, USA","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":"[Illinois Institute of Technology, Chicago, IL, USA.]","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060532041","display_name":"Xiaobing Zhou","orcid":"https://orcid.org/0000-0003-1983-0971"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaobing Zhou","raw_affiliation_strings":["Illinois Institute of Technology, Chicago, IL, USA","[Illinois Institute of Technology, Chicago, IL, USA.]"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, Chicago, IL, USA","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":"[Illinois Institute of Technology, Chicago, IL, USA.]","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100353673","display_name":"Hao Chen","orcid":"https://orcid.org/0009-0001-6480-7976"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Chen","raw_affiliation_strings":["Illinois Institute of Technology, Chicago, IL, USA","[Illinois Institute of Technology, Chicago, IL, USA.]"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, Chicago, IL, USA","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":"[Illinois Institute of Technology, Chicago, IL, USA.]","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078475557","display_name":"Michael Lang","orcid":"https://orcid.org/0000-0002-3498-6352"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Lang","raw_affiliation_strings":["Los Alamos National Laboratory, Los Alamos, NM, USA","Los Alamos National Laboratory,Los Alamos, NM, USA"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM, USA","institution_ids":["https://openalex.org/I1343871089"]},{"raw_affiliation_string":"Los Alamos National Laboratory,Los Alamos, NM, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030599558","display_name":"Ioan Raicu","orcid":"https://orcid.org/0000-0002-5477-439X"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ioan Raicu","raw_affiliation_strings":["Illinois Institute of Technology, Chicago, IL, USA","[Illinois Institute of Technology, Chicago, IL, USA.]"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, Chicago, IL, USA","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":"[Illinois Institute of Technology, Chicago, IL, USA.]","institution_ids":["https://openalex.org/I180949307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100705364"],"corresponding_institution_ids":["https://openalex.org/I180949307"],"apc_list":null,"apc_paid":null,"fwci":18.6197,"has_fulltext":false,"cited_by_count":58,"citation_normalized_percentile":{"value":0.99448719,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"111","last_page":"114"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.8281248211860657},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8190784454345703},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7660718560218811},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6915707588195801},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5516347885131836},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5176937580108643},{"id":"https://openalex.org/keywords/metadata-management","display_name":"Metadata management","score":0.48666125535964966},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4835376739501953},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.46462875604629517},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.45918864011764526},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4494363069534302},{"id":"https://openalex.org/keywords/processor-scheduling","display_name":"Processor scheduling","score":0.44098877906799316},{"id":"https://openalex.org/keywords/job-queue","display_name":"Job queue","score":0.4132125973701477},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.2944112718105316},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.24786114692687988},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2129535973072052},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20637154579162598},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1579762101173401}],"concepts":[{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.8281248211860657},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8190784454345703},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7660718560218811},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6915707588195801},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5516347885131836},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5176937580108643},{"id":"https://openalex.org/C2779489174","wikidata":"https://www.wikidata.org/wiki/Q6822246","display_name":"Metadata management","level":3,"score":0.48666125535964966},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4835376739501953},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.46462875604629517},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.45918864011764526},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4494363069534302},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.44098877906799316},{"id":"https://openalex.org/C171627638","wikidata":"https://www.wikidata.org/wiki/Q6206744","display_name":"Job queue","level":4,"score":0.4132125973701477},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2944112718105316},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.24786114692687988},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2129535973072052},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20637154579162598},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1579762101173401},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2600212.2600703","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2600212.2600703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd international symposium on High-performance parallel and distributed computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W47974043","https://openalex.org/W110825830","https://openalex.org/W114584783","https://openalex.org/W259012431","https://openalex.org/W1536870558","https://openalex.org/W1540179969","https://openalex.org/W1565868544","https://openalex.org/W1589918049","https://openalex.org/W1596936080","https://openalex.org/W1603731243","https://openalex.org/W1916850769","https://openalex.org/W1967541550","https://openalex.org/W2021876388","https://openalex.org/W2055120144","https://openalex.org/W2056441794","https://openalex.org/W2076449099","https://openalex.org/W2090575249","https://openalex.org/W2102849319","https://openalex.org/W2108841814","https://openalex.org/W2123130398","https://openalex.org/W2123134606","https://openalex.org/W2135377936","https://openalex.org/W2136623863","https://openalex.org/W2146381930","https://openalex.org/W2146917903","https://openalex.org/W2148317584","https://openalex.org/W2168067900","https://openalex.org/W2492440574","https://openalex.org/W2912355159","https://openalex.org/W4285719527","https://openalex.org/W6788151078"],"related_works":["https://openalex.org/W4287687414","https://openalex.org/W4389103060","https://openalex.org/W2401183800","https://openalex.org/W3074780027","https://openalex.org/W4391898412","https://openalex.org/W2735859668","https://openalex.org/W2927104664","https://openalex.org/W4385069207","https://openalex.org/W4319941049","https://openalex.org/W2418418119"],"abstract_inverted_index":{"With":[0],"the":[1,90,120],"exponential":[2],"growth":[3],"of":[4,68,77,102,122,130],"supercomputers":[5],"in":[6],"parallelism,":[7],"applications":[8],"are":[9],"growing":[10],"more":[11,48],"diverse,":[12],"including":[13],"traditional":[14],"large-scale":[15],"HPC":[16],"MPI":[17],"jobs,":[18],"and":[19,32,92],"ensemble":[20],"workloads":[21,37],"such":[22],"as":[23],"finer-grained":[24],"many-task":[25],"computing":[26],"(MTC)":[27],"applications.":[28],"Delivering":[29],"high":[30],"throughput":[31],"low":[33],"latency":[34],"for":[35],"both":[36],"requires":[38],"developing":[39],"a":[40,59,75],"distributed":[41,60,83,123],"job":[42,61,91,104],"management":[43],"system":[44],"that":[45],"is":[46,66,86],"magnitudes":[47],"scalable":[49],"than":[50],"today's":[51],"centralized":[52],"ones.":[53],"In":[54],"this":[55],"paper,":[56],"we":[57],"present":[58],"launch":[62],"prototype,":[63],"SLURM++,":[64],"which":[65],"comprised":[67],"multiple":[69],"controllers":[70],"with":[71,98,110],"each":[72],"one":[73],"managing":[74],"partition":[76],"SLURM":[78,99],"daemons,":[79],"while":[80],"ZHT":[81],"(a":[82],"key-value":[84],"store)":[85],"used":[87],"to":[88,107,128],"store":[89],"resource":[93],"metadata.":[94],"We":[95,117],"compared":[96],"SLURM++":[97],"using":[100],"micro-benchmarks":[101],"different":[103],"sizes":[105],"up":[106,127],"500":[108],"nodes,":[109],"excellent":[111],"results":[112],"showing":[113],"10X":[114],"higher":[115],"throughput.":[116],"also":[118],"studied":[119],"potential":[121],"scheduling":[124],"through":[125],"simulations":[126],"millions":[129],"nodes.":[131]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":22},{"year":2015,"cited_by_count":30},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
