{"id":"https://openalex.org/W4388581266","doi":"https://doi.org/10.1145/3624062.3624201","title":"A Reinforcement Learning Based Backfilling Strategy for HPC Batch Jobs","display_name":"A Reinforcement Learning Based Backfilling Strategy for HPC Batch Jobs","publication_year":2023,"publication_date":"2023-11-10","ids":{"openalex":"https://openalex.org/W4388581266","doi":"https://doi.org/10.1145/3624062.3624201"},"language":"en","primary_location":{"id":"doi:10.1145/3624062.3624201","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3624062.3624201","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.09264","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093240766","display_name":"Elliot Kolker-Hicks","orcid":"https://orcid.org/0009-0007-1366-9028"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elliot Kolker-Hicks","raw_affiliation_strings":["University of North Carolina at Charlotte, United States of America"],"raw_orcid":"https://orcid.org/0009-0007-1366-9028","affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte, United States of America","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001652887","display_name":"Di Zhang","orcid":"https://orcid.org/0009-0005-3115-0276"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Di Zhang","raw_affiliation_strings":["University of North Carolina at Charlotte, United States of America"],"raw_orcid":"https://orcid.org/0009-0005-3115-0276","affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte, United States of America","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012002926","display_name":"Dong Dai","orcid":"https://orcid.org/0000-0003-4078-8149"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Dai","raw_affiliation_strings":["University of North Carolina, Charlotte, United States of America"],"raw_orcid":"https://orcid.org/0000-0003-4078-8149","affiliations":[{"raw_affiliation_string":"University of North Carolina, Charlotte, United States of America","institution_ids":["https://openalex.org/I102149020"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6552,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.90697312,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1316","last_page":"1323"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.71897953748703},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6644514203071594},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6315814256668091},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3196587562561035},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1773967444896698},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.14902755618095398}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.71897953748703},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6644514203071594},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6315814256668091},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3196587562561035},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1773967444896698},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.14902755618095398}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3624062.3624201","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3624062.3624201","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2404.09264","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.09264","pdf_url":"https://arxiv.org/pdf/2404.09264","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2404.09264","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.09264","pdf_url":"https://arxiv.org/pdf/2404.09264","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G2164527530","display_name":null,"funder_award_id":"1908843","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5798714714","display_name":null,"funder_award_id":"CCF-1908843","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6650200984","display_name":null,"funder_award_id":"CNS-2008265","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8017972125","display_name":null,"funder_award_id":"2008265","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388581266.pdf","grobid_xml":"https://content.openalex.org/works/W4388581266.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W59652542","https://openalex.org/W1510274974","https://openalex.org/W1563494680","https://openalex.org/W2053341340","https://openalex.org/W2112168774","https://openalex.org/W2118449103","https://openalex.org/W2139250332","https://openalex.org/W2147598193","https://openalex.org/W2756568380","https://openalex.org/W2767921504","https://openalex.org/W2952046647","https://openalex.org/W2966274836","https://openalex.org/W3129362935","https://openalex.org/W4231792135","https://openalex.org/W4255654664","https://openalex.org/W4283367935","https://openalex.org/W4292688220"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"High":[0],"Performance":[1],"Computing":[2],"(HPC)":[3],"systems":[4,20,57],"are":[5,67,76],"used":[6],"across":[7],"a":[8,29,52,60,108,130,135,193],"wide":[9],"range":[10],"of":[11,25,98,114,183],"disciplines":[12],"for":[13,78],"both":[14],"large":[15],"and":[16,102,122,141,174,237],"complex":[17],"computations.":[18],"HPC":[19,55],"often":[21,58],"receive":[22],"many":[23],"thousands":[24],"computational":[26],"tasks":[27],"at":[28],"time,":[30],"colloquially":[31],"referred":[32],"to":[33,70,93,119,147,218,230],"as":[34,42,44],"\u201cjobs\u201d.":[35],"These":[36],"jobs":[37,66,101],"must":[38],"then":[39],"be":[40,49,158],"scheduled":[41,68],"optimally":[43],"possible":[45],"so":[46],"they":[47],"can":[48,157,202],"completed":[50],"within":[51],"reasonable":[53],"timeframe.":[54],"scheduling":[56,221],"employ":[59],"technique":[61],"called":[62],"\u201cbackfilling\u201d,":[63],"wherein":[64],"low-priority":[65],"earlier":[69],"use":[71],"the":[72,79,95,99,149,172,181,245],"available":[73],"resources":[74],"that":[75,111],"waiting":[77],"pending":[80],"high-priority":[81],"jobs.":[82],"To":[83,144],"make":[84],"it":[85],"work,":[86],"backfilling":[87,121,142,196,205,232],"largely":[88],"relies":[89,163],"on":[90,164,180,187,209,224],"job":[91,115,211,227,235,248,252],"runtime":[92,116,236,249],"calculate":[94],"start":[96],"time":[97],"ready-to-schedule":[100],"avoid":[103],"delaying":[104],"them.":[105],"It":[106],"is":[107,134],"common":[109],"belief":[110],"better":[112,120,220,239],"estimations":[113],"will":[117],"lead":[118],"more":[123],"effective":[124,204],"scheduling.":[125],"However,":[126],"our":[127],"experiments":[128],"show":[129,199,216],"different":[131],"conclusion:":[132],"there":[133],"missing":[136],"trade-off":[137],"between":[138],"prediction":[139],"accuracy":[140],"opportunities.":[143],"learn":[145,203],"how":[146,200],"achieve":[148],"best":[150],"trade-off,":[151],"we":[152,190],"believe":[153],"reinforcement":[154,194],"learning":[155],"(RL)":[156],"effectively":[159],"leveraged.":[160],"Reinforcement":[161],"Learning":[162],"an":[165],"\u201cagent\u201d":[166],"which":[167],"makes":[168],"decisions":[169],"from":[170],"observing":[171],"environment,":[173],"gains":[175],"rewards":[176],"or":[177],"punishments":[178],"based":[179],"quality":[182],"its":[184],"decision-making.":[185],"Based":[186],"this":[188],"idea,":[189],"designed":[191],"RLBackfilling,":[192],"learning-based":[195],"algorithm.":[197],"We":[198],"RLBackfilling":[201],"strategies":[206],"via":[207],"trial-and-error":[208],"existing":[210],"traces.":[212],"Our":[213],"evaluation":[214],"results":[215],"up":[217],"17x":[219],"performance":[222,240],"(based":[223],"average":[225],"bounded":[226],"slowdown)":[228],"compared":[229,241],"EASY":[231,243],"using":[233,244],"user-provided":[234],"4.7x":[238],"with":[242],"ideal":[246],"predicted":[247],"(the":[250],"actual":[251],"runtime).":[253]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
