{"id":"https://openalex.org/W6911094960","doi":"https://doi.org/10.5075/epfl-thesis-8995","title":"System Support for Robust Distributed Learning","display_name":"System Support for Robust Distributed Learning","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W6911094960","doi":"https://doi.org/10.5075/epfl-thesis-8995"},"language":"en","primary_location":{"id":"pmh:oai:infoscience.epfl.ch:294821","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/188775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doctoral thesis"},"type":"dissertation","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://infoscience.epfl.ch/handle/20.500.14299/188775","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Guirguis, Arsany Hany Abdelmessih","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guirguis, Arsany Hany Abdelmessih","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.4772000014781952,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.4772000014781952,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.0851999968290329,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.05260000005364418,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.718500018119812},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6611999869346619},{"id":"https://openalex.org/keywords/elasticity","display_name":"Elasticity (physics)","score":0.5436999797821045},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.49939998984336853},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4675999879837036},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.45080000162124634},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.427700012922287},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4244999885559082},{"id":"https://openalex.org/keywords/federated-learning","display_name":"Federated learning","score":0.4138000011444092},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.40540000796318054},{"id":"https://openalex.org/keywords/pace","display_name":"Pace","score":0.399399995803833}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187000155448914},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.7347000241279602},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.718500018119812},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6611999869346619},{"id":"https://openalex.org/C121854251","wikidata":"https://www.wikidata.org/wiki/Q62932","display_name":"Elasticity (physics)","level":2,"score":0.5436999797821045},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.49939998984336853},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4675999879837036},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.45080000162124634},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.427700012922287},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4244999885559082},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.4138000011444092},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.40540000796318054},{"id":"https://openalex.org/C2777526511","wikidata":"https://www.wikidata.org/wiki/Q691543","display_name":"Pace","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C2779582901","wikidata":"https://www.wikidata.org/wiki/Q21013010","display_name":"Distributed learning","level":2,"score":0.37389999628067017},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.36550000309944153},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.358599990606308},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3334999978542328},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32190001010894775},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C3739613","wikidata":"https://www.wikidata.org/wiki/Q679003","display_name":"Distributed Computing Environment","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C2779370713","wikidata":"https://www.wikidata.org/wiki/Q357554","display_name":"Load management","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C66898733","wikidata":"https://www.wikidata.org/wiki/Q5428126","display_name":"Fabric computing","level":5,"score":0.28349998593330383},{"id":"https://openalex.org/C130120984","wikidata":"https://www.wikidata.org/wiki/Q2835898","display_name":"Distributed algorithm","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C3017813396","wikidata":"https://www.wikidata.org/wiki/Q17078173","display_name":"Resource constraints","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.25220000743865967},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.24279999732971191},{"id":"https://openalex.org/C513985346","wikidata":"https://www.wikidata.org/wiki/Q270471","display_name":"Virtualization","level":3,"score":0.23999999463558197},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.23749999701976776},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2222999930381775}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:infoscience.epfl.ch:294821","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/188775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doctoral thesis"},{"id":"doi:10.5075/epfl-thesis-8995","is_oa":true,"landing_page_url":"https://doi.org/10.5075/epfl-thesis-8995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400488","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"thesis"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:294821","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/188775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doctoral thesis"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Machine":[0],"learning":[1,326,395],"(ML)":[2],"applications":[3,72,97,183,328],"are":[4],"ubiquitous.":[5],"They":[6],"run":[7,75],"in":[8,67,73,107,219,244,254,297,329,392],"different":[9,220],"environments":[10],"such":[11,368],"as":[12],"datacenters,":[13],"the":[14,29,45,91,108,116,128,133,138,151,187,190,233,240,271,281,287,301,306,320,330,342,345,355,359,365,393,417,425],"cloud,":[15],"and":[16,140,156,206,211,224,274,344,385,410,420,431,437],"even":[17],"on":[18,76,145,413],"edge":[19,146],"devices.":[20],"Despite":[21],"where":[22],"they":[23],"run,":[24],"distributing":[25,38],"ML":[26,39,49,52,182,196],"training":[27,53,71,106,134,144,230,234,302,307,324,435],"seems":[28],"only":[30],"way":[31],"to":[32,44,55,64,177,216,237,268,285,312,318,354,378,383],"attain":[33],"scalable,":[34],"high-quality":[35],"learning.":[36],"But,":[37],"is":[40],"challenging,":[41],"essentially":[42],"due":[43,63],"unique":[46],"nature":[47],"of":[48,79,95,119,143,153,189,289,323,404],"applications.":[50,69],"First,":[51],"needs":[54],"be":[56],"robust":[57,162,203],"against":[58],"arbitrary":[59],"(i.e.,":[60,209,231,407],"Byzantine)":[61],"failures":[62],"its":[65],"usage":[66],"mission-critical":[68],"Second,":[70],"datacenters":[74],"shared":[77,246],"clusters":[78],"computing":[80,120],"resources,":[81],"for":[82,161,338],"which":[83,251],"we":[84,123,172,400],"need":[85,124],"resource":[86,241],"allocation":[87,242],"solutions":[88,125,163],"that":[89,126,164,294,340,369],"meet":[90],"high":[92],"computation":[93],"demands":[94],"these":[96,167],"while":[98,351],"fully":[99],"utilizing":[100],"existing":[101],"resources.":[102],"Third,":[103],"running":[104,272],"distributed":[105],"cloud":[109,314],"faces":[110],"a":[111,175,335,405,408,411,422],"network":[112,222,321,360],"bottleneck,":[113],"exacerbated":[114],"by":[115,194,263,362],"fast-growing":[117],"pace":[118],"power.":[121],"Hence,":[122],"reduce":[127],"communication":[129,201],"load":[130],"without":[131,304],"impacting":[132,305],"accuracy.":[135,308],"Fourth,":[136],"despite":[137],"scalability":[139],"privacy":[141],"guarantees":[142],"devices":[147],"via":[148],"federated":[149,394],"learning,":[150],"heterogeneity":[152],"devices'":[154,426],"capabilities":[155],"their":[157,181],"data":[158,225],"distributions":[159],"calls":[160],"cope":[165],"with":[166,397],"challenges.":[168],"To":[169],"achieve":[170],"robustness,":[171],"introduce":[173],"Garfield,":[174],"library":[176],"help":[178],"practitioners":[179],"make":[180],"Byzantine-resilient.":[184],"Besides":[185],"addressing":[186],"vulnerability":[188],"shared-graph":[191],"architecture":[192],"followed":[193],"classical":[195],"frameworks,":[197],"Garfield":[198,218],"supports":[199],"various":[200],"patterns,":[202],"aggregation":[204],"rules,":[205],"compute":[207,343],"hardware":[208],"CPUs":[210],"GPUs).":[212],"We":[213,227,248,292,309,332,380],"show":[214,310,381],"how":[215,311,382],"use":[217,313],"architectures,":[221],"settings,":[223],"distributions.":[226],"explore":[228],"elastic":[229],"changing":[232],"parameters":[235],"mid-execution)":[236],"efficiently":[238,384],"solve":[239],"problem":[243],"datacenters'":[245],"clusters.":[247],"present":[249],"ERA,":[250],"provides":[252],"elasticity":[253],"two":[255],"dimensions:":[256],"(1)":[257],"it":[258,276],"scales":[259],"jobs":[260],"horizontally,":[261],"i.e.,":[262],"adding":[264],"or":[265,269,374],"removing":[266],"resources":[267],"from":[270],"jobs,":[273],"(2)":[275],"dynamically":[277],"changes,":[278],"at":[279],"will,":[280],"per-GPU":[282],"batch":[283],"size":[284],"control":[286],"utilization":[288],"each":[290,414],"GPU.":[291],"demonstrate":[293],"simultaneous":[295],"scaling":[296,418],"both":[298,402],"dimensions":[299],"improves":[300],"time":[303],"object":[315],"stores":[316],"(COS)":[317],"alleviate":[319],"bottleneck":[322,361],"transfer":[325],"(TL)":[327],"cloud.":[331],"propose":[333],"HAPI,":[334],"processing":[336],"system":[337],"TL":[339,366],"spans":[341],"COS":[346],"tiers,":[347],"enabling":[348],"significant":[349],"improvements":[350],"remaining":[352],"transparent":[353],"user.":[356],"HAPI":[357],"mitigates":[358],"carefully":[363],"splitting":[364],"application":[367],"feature":[370],"extraction":[371],"is,":[372],"partially":[373],"entirely,":[375],"executed":[376],"next":[377],"storage.":[379],"robustly":[386],"train":[387],"generative":[388],"adversarial":[389],"networks":[390],"(GANs)":[391],"paradigm":[396],"FeGAN.":[398],"Essentially,":[399],"co-locate":[401],"components":[403],"GAN":[406],"generator":[409],"discriminator)":[412],"device":[415],"(addressing":[416],"problem)":[419],"have":[421],"server":[423],"aggregate":[424],"models":[427],"using":[428],"balanced":[429],"sampling":[430],"Kullback-Leibler":[432],"weighting,":[433],"mitigating":[434],"issues":[436],"boosting":[438],"convergence.":[439]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
