{"id":"https://openalex.org/W4283315879","doi":"https://doi.org/10.1145/3544782","title":"Scheduling Hyperparameters to Improve Generalization: From Centralized SGD to Asynchronous SGD","display_name":"Scheduling Hyperparameters to Improve Generalization: From Centralized SGD to Asynchronous SGD","publication_year":2022,"publication_date":"2022-06-22","ids":{"openalex":"https://openalex.org/W4283315879","doi":"https://doi.org/10.1145/3544782"},"language":"en","primary_location":{"id":"doi:10.1145/3544782","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3544782","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3544782","source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3544782","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021218766","display_name":"Jianhui Sun","orcid":"https://orcid.org/0000-0003-0032-3646"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jianhui Sun","raw_affiliation_strings":["University of Virginia"],"affiliations":[{"raw_affiliation_string":"University of Virginia","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074173740","display_name":"Ying Yang","orcid":"https://orcid.org/0000-0003-1602-1212"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Yang","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048238468","display_name":"Guangxu Xun","orcid":"https://orcid.org/0000-0002-7657-4305"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangxu Xun","raw_affiliation_strings":["Baidu Research"],"affiliations":[{"raw_affiliation_string":"Baidu Research","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013588572","display_name":"Aidong Zhang","orcid":"https://orcid.org/0000-0001-9723-3246"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aidong Zhang","raw_affiliation_strings":["University of Virginia"],"affiliations":[{"raw_affiliation_string":"University of Virginia","institution_ids":["https://openalex.org/I51556381"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5021218766"],"corresponding_institution_ids":["https://openalex.org/I51556381"],"apc_list":null,"apc_paid":null,"fwci":1.1104,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.81248037,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.8115942478179932},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7494226098060608},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.7206194400787354},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.5518032312393188},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.42958641052246094},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4210802912712097},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.4209998548030853},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3814672827720642},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3616753816604614},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35258984565734863},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.21368947625160217},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20884627103805542}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.8115942478179932},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7494226098060608},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.7206194400787354},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.5518032312393188},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.42958641052246094},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4210802912712097},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.4209998548030853},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3814672827720642},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3616753816604614},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35258984565734863},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.21368947625160217},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20884627103805542},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3544782","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3544782","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3544782","source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3544782","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3544782","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3544782","source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G2167990707","display_name":null,"funder_award_id":"1955151","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3770367615","display_name":null,"funder_award_id":"1934600","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6014851183","display_name":null,"funder_award_id":"2008208","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7826493304","display_name":null,"funder_award_id":"IIS-2106913","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8562946007","display_name":null,"funder_award_id":"1938167","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4283315879.pdf","grobid_xml":"https://content.openalex.org/works/W4283315879.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1806891645","https://openalex.org/W1988720110","https://openalex.org/W2014384147","https://openalex.org/W2062714872","https://openalex.org/W2097998348","https://openalex.org/W2108598243","https://openalex.org/W2605488176","https://openalex.org/W2726042758","https://openalex.org/W2752782242","https://openalex.org/W2808402975","https://openalex.org/W2883109957","https://openalex.org/W2911445777","https://openalex.org/W2912083425","https://openalex.org/W2912500072","https://openalex.org/W2920882081","https://openalex.org/W2951538941","https://openalex.org/W2962915600","https://openalex.org/W2963243933","https://openalex.org/W2963792515","https://openalex.org/W2964054038","https://openalex.org/W2964102336","https://openalex.org/W2990719156","https://openalex.org/W3008463948","https://openalex.org/W3014686072","https://openalex.org/W3080580959","https://openalex.org/W3080994088","https://openalex.org/W3091864426","https://openalex.org/W3139294691","https://openalex.org/W3167622379","https://openalex.org/W4255556797","https://openalex.org/W4291127132","https://openalex.org/W6674385629"],"related_works":["https://openalex.org/W4288259399","https://openalex.org/W2969529314","https://openalex.org/W4312206232","https://openalex.org/W2975301014","https://openalex.org/W2895097035","https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W4283028824","https://openalex.org/W4366280654","https://openalex.org/W3160167280"],"abstract_inverted_index":{"This":[0],"paper":[1],"1":[2],"studies":[3],"how":[4,90,244],"to":[5,8,67,91,103,182,195,252],"schedule":[6,93],"hyperparameters":[7,66,95,150],"improve":[9,104],"generalization":[10,157,235,278],"of":[11,96,100,122,167,221],"both":[12,48,281],"centralized":[13,49,85],"single-machine":[14,86],"stochastic":[15,226],"gradient":[16,37,216],"descent":[17],"(SGD)":[18],"and":[19,34,50,88,154,199,230,289],"distributed":[20,51,183],"asynchronous":[21,184,222],"SGD":[22,24,79,87,175,185],"(ASGD).":[23],"augmented":[25],"with":[26],"momentum":[27,32,56,101,114,123,152],"variants":[28,102,124],"(e.g.,":[29,151],"heavy":[30],"ball":[31],"(SHB)":[33],"Nesterov\u2019s":[35],"accelerated":[36],"(NAG))":[38],"has":[39],"been":[40],"the":[41,73,94,165,210,219,286],"default":[42],"optimizer":[43],"for":[44,76,170,237],"many":[45,54],"tasks,":[46],"in":[47,187,280,293],"environments.":[52],"However,":[53],"advanced":[55],"variants,":[57],"despite":[58],"empirical":[59],"advantage":[60],"over":[61],"classical":[62],"SHB/NAG,":[63],"introduce":[64],"extra":[65],"tune.":[68],"The":[69],"error-prone":[70],"tuning":[71,160,256,272,287],"is":[72],"main":[74],"barrier":[75],"AutoML.":[77],"Centralized":[78],":":[80,176],"We":[81,106,163,177,208],"first":[82],"focus":[83,135],"on":[84,136],"show":[89,164,243],"efficiently":[92],"a":[97,108,119,189,225,233,240,245,263],"large":[98,120,247],"class":[99],"generalization.":[105],"propose":[107],"unified":[109],"framework":[110],"called":[111],"multistage":[112,145,168],"quasi-hyperbolic":[113],"(Multistage":[115],"QHM),":[116],"which":[117,188],"covers":[118],"family":[121],"as":[125,266],"its":[126],"special":[127],"cases":[128],"(e.g.":[129],"vanilla":[130],"SGD/SHB/NAG).":[131],"Existing":[132],"works":[133],"mainly":[134],"only":[137,159],"scheduling":[138],"learning":[139,248],"rate":[140,249],"\u03b1":[141,161],"\u2019s":[142],"decay,":[143],"while":[144],"QHM":[146,169],"allows":[147],"additional":[148],"varying":[149],"factor),":[153],"demonstrates":[155],"better":[156],"than":[158,262],".":[162],"convergence":[166],"general":[171],"nonconvex":[172],"objectives.":[173],"Distributed":[174],"then":[178,231],"extend":[179],"our":[180,271,276],"theory":[181],"(ASGD),":[186],"parameter":[190],"server":[191],"distributes":[192],"data":[193],"batches":[194],"several":[196],"worker":[197],"machines":[198],"updates":[200],"parameters":[201],"via":[202,224],"aggregating":[203],"batch":[204],"gradients":[205],"from":[206],"workers.":[207],"quantify":[209],"asynchrony":[211],"between":[212],"different":[213],"workers":[214],"(i.e.,":[215],"staleness),":[217],"model":[218],"dynamics":[220],"iterations":[223],"differential":[227],"equation":[228],"(SDE),":[229],"derive":[232],"PAC-Bayesian":[234],"bound":[236],"ASGD.":[238],"As":[239],"byproduct,":[241],"we":[242,267],"moderately":[246],"helps":[250],"ASGD":[251],"generalize":[253],"better.":[254],"Our":[255,283,297],"strategies":[257,273,284],"have":[258],"rigorous":[259],"justifications":[260],"rather":[261],"blind":[264],"trial-and-error":[265],"theoretically":[268],"prove":[269],"why":[270],"could":[274],"decrease":[275],"derived":[277],"errors":[279],"cases.":[282],"simplify":[285],"process":[288],"beat":[290],"competitive":[291],"optimizers":[292],"test":[294],"accuracy":[295],"empirically.":[296],"codes":[298],"are":[299],"publicly":[300],"available":[301],"https://github.com/jsycsjh/centralized-asynchronous-tuning.":[302]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
