{"id":"https://openalex.org/W4308091085","doi":"https://doi.org/10.1109/hpec55821.2022.9926390","title":"DASH: Scheduling Deep Learning Workloads on Multi-Generational GPU-Accelerated Clusters","display_name":"DASH: Scheduling Deep Learning Workloads on Multi-Generational GPU-Accelerated Clusters","publication_year":2022,"publication_date":"2022-09-19","ids":{"openalex":"https://openalex.org/W4308091085","doi":"https://doi.org/10.1109/hpec55821.2022.9926390"},"language":"en","primary_location":{"id":"doi:10.1109/hpec55821.2022.9926390","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100733204","display_name":"Baolin Li","orcid":"https://orcid.org/0000-0001-9778-1023"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Baolin Li","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007281535","display_name":"Tirthak Patel","orcid":"https://orcid.org/0000-0003-3127-5931"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Tirthak Patel","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043450560","display_name":"Vijay Gadepally","orcid":"https://orcid.org/0000-0002-4598-2808"},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vijay Gadepally","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023809975","display_name":"Karen Gettings","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Karen Gettings","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103227438","display_name":"Siddharth Samsi","orcid":"https://orcid.org/0009-0000-2884-9688"},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddharth Samsi","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074406596","display_name":"Devesh Tiwari","orcid":"https://orcid.org/0000-0002-7253-2458"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Devesh Tiwari","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100733204"],"corresponding_institution_ids":["https://openalex.org/I87182695"],"apc_list":null,"apc_paid":null,"fwci":0.303,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62324243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dash","display_name":"Dash","score":0.8800577521324158},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8785326480865479},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.686482310295105},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.6660828590393066},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5768143534660339},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4743586778640747},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.4558310806751251},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.43496280908584595},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.4107578992843628},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31330856680870056},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.2929898798465729},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20537057518959045},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.12718120217323303}],"concepts":[{"id":"https://openalex.org/C2776090536","wikidata":"https://www.wikidata.org/wiki/Q187819","display_name":"Dash","level":2,"score":0.8800577521324158},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8785326480865479},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.686482310295105},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.6660828590393066},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5768143534660339},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4743586778640747},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.4558310806751251},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.43496280908584595},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.4107578992843628},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31330856680870056},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.2929898798465729},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20537057518959045},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.12718120217323303},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec55821.2022.9926390","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec55821.2022.9926390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W1686810756","https://openalex.org/W2054548832","https://openalex.org/W2141992894","https://openalex.org/W2153841541","https://openalex.org/W2194775991","https://openalex.org/W2525778437","https://openalex.org/W2612026221","https://openalex.org/W2734941459","https://openalex.org/W2765985261","https://openalex.org/W2769856846","https://openalex.org/W2798515322","https://openalex.org/W2884711234","https://openalex.org/W2896457183","https://openalex.org/W2901541570","https://openalex.org/W2913525628","https://openalex.org/W2916979304","https://openalex.org/W2929502194","https://openalex.org/W2937394206","https://openalex.org/W2955146308","https://openalex.org/W2963446712","https://openalex.org/W2963918968","https://openalex.org/W2971725207","https://openalex.org/W2972163809","https://openalex.org/W2973058717","https://openalex.org/W3006926841","https://openalex.org/W3016842236","https://openalex.org/W3022298203","https://openalex.org/W3022548332","https://openalex.org/W3043023836","https://openalex.org/W3104130891","https://openalex.org/W3211336323","https://openalex.org/W4200515842","https://openalex.org/W4255681033","https://openalex.org/W4287889735","https://openalex.org/W4288630254","https://openalex.org/W4289401659","https://openalex.org/W4293775467","https://openalex.org/W4297775537","https://openalex.org/W4385245566","https://openalex.org/W6637373629","https://openalex.org/W6677314723","https://openalex.org/W6727690538","https://openalex.org/W6737664043","https://openalex.org/W6739901393","https://openalex.org/W6746514494","https://openalex.org/W6752367771","https://openalex.org/W6753209298","https://openalex.org/W6755207826","https://openalex.org/W6756009870","https://openalex.org/W6756439839","https://openalex.org/W6757922489","https://openalex.org/W6758283263","https://openalex.org/W6759814162","https://openalex.org/W6763315368","https://openalex.org/W6765429308","https://openalex.org/W6765484274","https://openalex.org/W6772091339","https://openalex.org/W6782839094"],"related_works":["https://openalex.org/W2337195301","https://openalex.org/W2521283716","https://openalex.org/W2503642292","https://openalex.org/W2805154545","https://openalex.org/W1482678631","https://openalex.org/W1569389315","https://openalex.org/W162708450","https://openalex.org/W108745714","https://openalex.org/W1543313205","https://openalex.org/W2076965844"],"abstract_inverted_index":{"Two":[0],"notable":[1],"characteristics":[2,76],"of":[3,22,25,77],"modern":[4],"GPU-accelerated":[5],"HPC":[6],"clusters":[7],"are:":[8],"(1)":[9],"they":[10,20,28],"increasingly":[11],"run":[12],"deep":[13],"learning":[14],"(DL)":[15],"model-training":[16],"workloads,":[17,80],"and":[18,65,90],"(2)":[19],"consist":[21],"multiple":[23],"generations":[24],"GPUs,":[26],"i.e.,":[27],"are":[29],"heterogeneous.":[30],"However,":[31],"existing":[32],"works":[33],"in":[34,68],"GPU":[35,45,52,66,71],"cluster":[36,53],"scheduling":[37],"for":[38],"DL":[39,63,79],"workloads":[40,64],"have":[41],"not":[42],"addressed":[43],"the":[44,84,91,101],"multi-generation":[46],"problem.":[47],"We":[48],"propose":[49],"DASH,":[50],"a":[51,59,69],"scheduler":[54],"designed":[55],"to":[56,100],"optimally":[57],"make":[58],"match":[60],"between":[61],"different":[62],"types":[67],"multi-generational":[70],"environment.":[72],"By":[73],"lever-aging":[74],"execution":[75],"co-scheduled":[78],"DASH":[81],"can":[82],"improve":[83],"average":[85,92],"job":[86,93,104],"runtime":[87],"by":[88,96],"17%":[89],"completion":[94],"time":[95],"14":[97],"%":[98],"compared":[99],"traditional":[102],"heterogeneity-unaware":[103],"scheduler.":[105]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
