{"id":"https://openalex.org/W3084498190","doi":"https://doi.org/10.1109/hpcs48598.2019.9188153","title":"rDLB: A Novel Approach for Robust Dynamic Load Balancing of Scientific Applications with Independent Tasks","display_name":"rDLB: A Novel Approach for Robust Dynamic Load Balancing of Scientific Applications with Independent Tasks","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W3084498190","doi":"https://doi.org/10.1109/hpcs48598.2019.9188153","mag":"3084498190"},"language":"en","primary_location":{"id":"doi:10.1109/hpcs48598.2019.9188153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcs48598.2019.9188153","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016313382","display_name":"Ali Mohammed","orcid":"https://orcid.org/0000-0002-8465-0398"},"institutions":[{"id":"https://openalex.org/I1850255","display_name":"University of Basel","ror":"https://ror.org/02s6k3f65","country_code":"CH","type":"education","lineage":["https://openalex.org/I1850255"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Ali Mohammed","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Basel, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Basel, Switzerland","institution_ids":["https://openalex.org/I1850255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020464041","display_name":"Aur\u00e9lien Cavelan","orcid":"https://orcid.org/0000-0002-1784-0730"},"institutions":[{"id":"https://openalex.org/I1850255","display_name":"University of Basel","ror":"https://ror.org/02s6k3f65","country_code":"CH","type":"education","lineage":["https://openalex.org/I1850255"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Aurelien Cavelan","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Basel, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Basel, Switzerland","institution_ids":["https://openalex.org/I1850255"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055373392","display_name":"Florina M. Ciorba","orcid":"https://orcid.org/0000-0002-2773-4499"},"institutions":[{"id":"https://openalex.org/I1850255","display_name":"University of Basel","ror":"https://ror.org/02s6k3f65","country_code":"CH","type":"education","lineage":["https://openalex.org/I1850255"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Florina M. Ciorba","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Basel, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Basel, Switzerland","institution_ids":["https://openalex.org/I1850255"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5016313382"],"corresponding_institution_ids":["https://openalex.org/I1850255"],"apc_list":null,"apc_paid":null,"fwci":0.1769,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58865524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":"21","issue":null,"first_page":"374","last_page":"381"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8335981965065002},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6975932121276855},{"id":"https://openalex.org/keywords/load-balancing","display_name":"Load balancing (electrical power)","score":0.611696183681488},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6065289378166199},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.5705806612968445},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.568405270576477},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5263113975524902},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.4155203700065613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8335981965065002},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6975932121276855},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.611696183681488},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6065289378166199},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.5705806612968445},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.568405270576477},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5263113975524902},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.4155203700065613},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpcs48598.2019.9188153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcs48598.2019.9188153","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W170768260","https://openalex.org/W1541239844","https://openalex.org/W1560961201","https://openalex.org/W1571109258","https://openalex.org/W1986905947","https://openalex.org/W1993660990","https://openalex.org/W2016559894","https://openalex.org/W2030129803","https://openalex.org/W2062214652","https://openalex.org/W2082013839","https://openalex.org/W2089536264","https://openalex.org/W2097321380","https://openalex.org/W2105947650","https://openalex.org/W2106450765","https://openalex.org/W2121188877","https://openalex.org/W2143291413","https://openalex.org/W2149294210","https://openalex.org/W2156146566","https://openalex.org/W2581245178","https://openalex.org/W2606550271","https://openalex.org/W2795277579","https://openalex.org/W2889256712","https://openalex.org/W2889553510","https://openalex.org/W2901510811","https://openalex.org/W2964120424","https://openalex.org/W2966776354","https://openalex.org/W2977548488","https://openalex.org/W4242361160","https://openalex.org/W6634209395","https://openalex.org/W6753952404","https://openalex.org/W6768661084"],"related_works":["https://openalex.org/W2153096481","https://openalex.org/W2148616436","https://openalex.org/W2102525122","https://openalex.org/W4245282135","https://openalex.org/W4306316843","https://openalex.org/W2130594209","https://openalex.org/W2036953450","https://openalex.org/W4300955944","https://openalex.org/W2170004886","https://openalex.org/W2527822502"],"abstract_inverted_index":{"Parallel":[0],"scientific":[1,116],"applications":[2,24,117,178,222],"that":[3,48,66,75,157,177,198],"execute":[4],"on":[5,68,81,121],"high":[6],"performance":[7,200],"computing":[8],"(HPC)":[9],"systems":[10,123],"often":[11],"contain":[12],"large":[13,69],"and":[14,134,197],"computationally-intensive":[15],"parallel":[16],"loops.":[17],"The":[18,72,173],"independent":[19,26,119],"loop":[20],"iterations":[21],"of":[22,39,44,84,92,115,138,154,171,191,204,210,221,232],"such":[23,40],"represent":[25],"tasks.":[27],"Dynamic":[28],"toad":[29],"balancing":[30,108],"(DLB)":[31],"is":[32,144,227],"used":[33,51],"to":[34,52,88,183,194,213,237],"achieve":[35,53],"a":[36,103,159,208,230],"balanced":[37],"execution":[38],"applications.":[41],"However,":[42],"most":[43],"the":[45,82,90,112,163,169,189,195,202,214,219,238],"self-scheduling-based":[46,73],"techniques":[47,74],"are":[49,55],"typically":[50],"DLB":[54,149],"not":[56],"robust":[57,105,113],"against":[58,223],"component":[59],"(e.g.,":[60],"processors,":[61],"network)":[62],"failures":[63,77,125,139,187],"or":[64,140],"perturbations":[65,79,205,224],"arise":[67],"HPC":[70,122],"systems.":[71],"tolerate":[76,181],"and/or":[78,97,126],"rely":[80],"existence":[83],"fault-and/or":[85],"perturbation-detection":[86],"mechanisms":[87],"trigger":[89],"rescheduling":[91],"tasks":[93,120,133],"scheduled":[94],"onto":[95],"failed":[96],"perturbed":[98],"components.":[99],"This":[100],"work":[101],"proposes":[102],"novel":[104],"dynamic":[106],"load":[107],"(rDLB)":[109],"approach":[110],"for":[111,158],"self-scheduling":[114],"with":[118,168],"under":[124],"perturbations.":[127,141],"rDLB":[128,143,155,180,235],"proactively":[129],"reschedules":[130],"already":[131],"allocated":[132,193],"requires":[135],"no":[136],"detection":[137],"Moreover,":[142,218],"integrated":[145],"into":[146],"an":[147],"MPI-based":[148],"library.":[150],"An":[151],"analytical":[152],"modeling":[153],"shows":[156,176],"fixed":[160],"problem":[161],"size,":[162],"fault-tolerance":[164],"overhead":[165],"linearly":[166],"decreases":[167],"number":[170,190],"processors.":[172],"experimental":[174],"evaluation":[175],"using":[179,234],"up":[182],"P-l":[184],"worker":[185],"processor":[186],"(P-is":[188],"processors":[192],"application)":[196],"their":[199],"in":[201],"presence":[203],"improved":[206],"by":[207,229],"factor":[209,231],"7":[211],"compared":[212,236],"case":[215,239],"without":[216,240],"rDLB.":[217,241],"robustness":[220],"(i.e.,":[225],"flexibility)":[226],"boosted":[228],"30":[233]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
