{"id":"https://openalex.org/W3121702752","doi":"https://doi.org/10.1109/tc.2021.3054656","title":"\u03bb<i>DNN</i>: Achieving Predictable Distributed DNN Training With Serverless Architectures","display_name":"\u03bb<i>DNN</i>: Achieving Predictable Distributed DNN Training With Serverless Architectures","publication_year":2021,"publication_date":"2021-01-26","ids":{"openalex":"https://openalex.org/W3121702752","doi":"https://doi.org/10.1109/tc.2021.3054656","mag":"3121702752"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2021.3054656","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2021.3054656","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029626585","display_name":"Fei Xu","orcid":"https://orcid.org/0000-0003-1590-5323"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fei Xu","raw_affiliation_strings":["Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082777501","display_name":"Yiling Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiling Qin","raw_affiliation_strings":["Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Multidimensional Information Processing, School of Computer Science and Technology, East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100379221","display_name":"Li Chen","orcid":"https://orcid.org/0000-0002-2300-6996"},"institutions":[{"id":"https://openalex.org/I79516672","display_name":"University of Louisiana at Lafayette","ror":"https://ror.org/01x8rc503","country_code":"US","type":"education","lineage":["https://openalex.org/I2799628689","https://openalex.org/I79516672"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Chen","raw_affiliation_strings":["School of Computing and Informatics, University of Louisiana at Lafayette, Lafayette, LA, USA"],"affiliations":[{"raw_affiliation_string":"School of Computing and Informatics, University of Louisiana at Lafayette, Lafayette, LA, USA","institution_ids":["https://openalex.org/I79516672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100760218","display_name":"Zhi Zhou","orcid":"https://orcid.org/0000-0002-0987-9344"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Zhou","raw_affiliation_strings":["Guangdong Key Laboratory of Big Data Analysis and Processing, School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Key Laboratory of Big Data Analysis and Processing, School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048398271","display_name":"Fangming Liu","orcid":"https://orcid.org/0000-0002-8570-1345"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangming Liu","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5029626585"],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":4.2633,"has_fulltext":false,"cited_by_count":56,"citation_normalized_percentile":{"value":0.95431414,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"71","issue":"2","first_page":"450","last_page":"463"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/provisioning","display_name":"Provisioning","score":0.8498848080635071},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7214655876159668},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6060550212860107},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.6031787991523743},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5338979959487915},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36833781003952026},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3657342791557312},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35923004150390625},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3262961804866791},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32341575622558594},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30896955728530884},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.18906128406524658}],"concepts":[{"id":"https://openalex.org/C172191483","wikidata":"https://www.wikidata.org/wiki/Q1071806","display_name":"Provisioning","level":2,"score":0.8498848080635071},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7214655876159668},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6060550212860107},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.6031787991523743},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5338979959487915},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36833781003952026},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3657342791557312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35923004150390625},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3262961804866791},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32341575622558594},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30896955728530884},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.18906128406524658}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2021.3054656","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2021.3054656","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.4699999988079071,"display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G1037395693","display_name":null,"funder_award_id":"3004210116","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4797180708","display_name":null,"funder_award_id":"2017YFB1001703","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4892158929","display_name":null,"funder_award_id":"61722206","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4922142912","display_name":null,"funder_award_id":"392046569 of NSFC-DFG","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5825799639","display_name":null,"funder_award_id":"2017KFKJXX009","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G7434893301","display_name":null,"funder_award_id":"61520106005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G75260446","display_name":null,"funder_award_id":"61761136014","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7638806211","display_name":null,"funder_award_id":"18DZ2270800","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G8882784736","display_name":null,"funder_award_id":"20511102802","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W2018162709","https://openalex.org/W2752512710","https://openalex.org/W2773724532","https://openalex.org/W2798515322","https://openalex.org/W2886050937","https://openalex.org/W2889574497","https://openalex.org/W2889893539","https://openalex.org/W2890025235","https://openalex.org/W2899160556","https://openalex.org/W2918828872","https://openalex.org/W2918996793","https://openalex.org/W2930508541","https://openalex.org/W2956461999","https://openalex.org/W2963149401","https://openalex.org/W2963403751","https://openalex.org/W2963988417","https://openalex.org/W2979479406","https://openalex.org/W2983459053","https://openalex.org/W2987607480","https://openalex.org/W2988514844","https://openalex.org/W3010457348","https://openalex.org/W3010543330","https://openalex.org/W3012028616","https://openalex.org/W3012125688","https://openalex.org/W3012257905","https://openalex.org/W3023815730","https://openalex.org/W3037519745","https://openalex.org/W3045297266","https://openalex.org/W3047528232","https://openalex.org/W3093150360","https://openalex.org/W3095841401","https://openalex.org/W3096533337","https://openalex.org/W3096650550","https://openalex.org/W3097948932","https://openalex.org/W3131944805","https://openalex.org/W4242841269","https://openalex.org/W4250589301","https://openalex.org/W4287865415","https://openalex.org/W4288020540","https://openalex.org/W4288289123","https://openalex.org/W6744307745","https://openalex.org/W6753751555","https://openalex.org/W6755923984","https://openalex.org/W6758283263","https://openalex.org/W6759743693","https://openalex.org/W6761088107","https://openalex.org/W6765484274","https://openalex.org/W6769424276","https://openalex.org/W6770197564","https://openalex.org/W6772936160","https://openalex.org/W6773725002","https://openalex.org/W6773836194","https://openalex.org/W6775201933","https://openalex.org/W6779740189","https://openalex.org/W6780878769"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W1987128138","https://openalex.org/W2743976221"],"abstract_inverted_index":{"Serverless":[0],"computing":[1],"is":[2,83],"becoming":[3],"a":[4,27,40,109,142],"promising":[5],"paradigm":[6],"for":[7,119],"Distributed":[8],"Deep":[9],"Neural":[10],"Network":[11],"(DDNN)":[12],"training":[13,25,63,82,122,146,165,189],"in":[14,65],"the":[15,87,126,132,193,206],"cloud,":[16],"as":[17,161],"it":[18],"allows":[19],"users":[20],"to":[21,60,115,149,162,201],"decompose":[22],"complex":[23],"model":[24,148],"into":[26],"number":[28,46],"of":[29,79,90,128,153,196],"<i>functions</i>":[30],"without":[31],"managing":[32],"virtual":[33],"machines":[34],"or":[35,56],"servers.":[36],"Though":[37],"provided":[38],"with":[39,167,205,212],"simpler":[41],"resource":[42,52,88,112,157,208],"interface":[43],"(i.e.,":[44],"function":[45,51,111,137,197],"and":[47,94,104,136,176,191],"memory":[48],"size),":[49],"inadequate":[50],"provisioning":[53,113,158,209],"(either":[54],"under-provisioning":[55],"over-provisioning)":[57],"easily":[58],"leads":[59],"<i>unpredictable</i>":[61],"DDNN":[62,81,121,145,164,188],"performance":[64,118,147,166,190],"serverless":[66,80,120,168],"platforms.":[67],"Our":[68],"empirical":[69],"studies":[70],"on":[71,173],"AWS":[72,174],"Lambda":[73,175],"indicate":[74],"that,":[75,181],"such":[76],"<i>unpredictable":[77],"performance</i>":[78],"mainly":[84],"caused":[85],"by":[86,199],"bottleneck":[89],"Parameter":[91],"Servers":[92],"(PS)":[93],"small":[95],"local":[96],"batch":[97],"size.":[98],"In":[99],"this":[100],"article,":[101],"we":[102,140],"design":[103,152],"implement":[105],"<i><inline-formula><tex-math":[106,154,182],"notation=\"LaTeX\">$\\lambda$</tex-math><alternatives><mml:math><mml:mi>&#x03BB;</mml:mi></mml:math><inline-graphic":[107,155,183],"xlink:href=\"xu-ieq1-3054656.gif\"/></alternatives></inline-formula>DNN</i>,":[108],"cost-efficient":[110],"framework":[114],"provide":[116],"predictable":[117,187],"workloads,":[123],"while":[124],"saving":[125],"budget":[127],"provisioned":[129],"functions.":[130,169],"Leveraging":[131],"PS":[133],"network":[134],"bandwidth":[135],"CPU":[138],"utilization,":[139],"build":[141],"<i>lightweight</i>":[143],"analytical":[144],"enable":[150],"our":[151],"xlink:href=\"xu-ieq2-3054656.gif\"/></alternatives></inline-formula>DNN</i>":[156],"strategy,":[159],"so":[160],"guarantee":[163],"Extensive":[170],"prototype":[171],"experiments":[172],"complementary":[177],"trace-driven":[178],"simulations":[179],"demonstrate":[180],"xlink:href=\"xu-ieq3-3054656.gif\"/></alternatives></inline-formula>DNN</i>":[184],"can":[185],"deliver":[186],"save":[192],"monetary":[194],"cost":[195],"resources":[198],"up":[200],"66.7":[202],"percent,":[203],"compared":[204],"state-of-the-art":[207],"strategies,":[210],"yet":[211],"an":[213],"acceptable":[214],"runtime":[215],"overhead.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":17},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
