{"id":"https://openalex.org/W4401863528","doi":"https://doi.org/10.1145/3637528.3671718","title":"Provable Adaptivity of Adam under Non-uniform Smoothness","display_name":"Provable Adaptivity of Adam under Non-uniform Smoothness","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401863528","doi":"https://doi.org/10.1145/3637528.3671718"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671718","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671718","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671718","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671718","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055118140","display_name":"B. Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bohan Wang","raw_affiliation_strings":["University of Science and Technology of China &amp; Microsoft Research Asia, Beijing, Haidian, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China &amp; Microsoft Research Asia, Beijing, Haidian, China","institution_ids":["https://openalex.org/I4210113369","https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111823049","display_name":"Y. Q. Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yushun Zhang","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066928490","display_name":"Huishuai Zhang","orcid":"https://orcid.org/0000-0003-2711-7295"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huishuai Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044802273","display_name":"Qi Meng","orcid":"https://orcid.org/0000-0002-3103-1999"},"institutions":[{"id":"https://openalex.org/I4210120485","display_name":"Academy of Mathematics and Systems Science","ror":"https://ror.org/02jkmyk67","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210120485"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Meng","raw_affiliation_strings":["Chinese Academy of Mathematics and Systems Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Mathematics and Systems Science, Beijing, China","institution_ids":["https://openalex.org/I4210120485"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028963874","display_name":"Ruoyu Sun","orcid":"https://orcid.org/0000-0003-2487-5322"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruoyu Sun","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101199494","display_name":"Zhi-Ming Ma","orcid":"https://orcid.org/0000-0003-1169-7627"},"institutions":[{"id":"https://openalex.org/I4210120485","display_name":"Academy of Mathematics and Systems Science","ror":"https://ror.org/02jkmyk67","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210120485"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi-Ming Ma","raw_affiliation_strings":["Chinese Academy of Mathematics and Systems Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Mathematics and Systems Science, Beijing, China","institution_ids":["https://openalex.org/I4210120485"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101884287","display_name":"Tie\u2010Yan Liu","orcid":"https://orcid.org/0000-0002-0476-8020"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tie-Yan Liu","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101766226","display_name":"Zhi\u2010Quan Luo","orcid":"https://orcid.org/0000-0003-3995-914X"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi-Quan Luo","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100690341","display_name":"Wei Chen","orcid":"https://orcid.org/0000-0002-7438-5180"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Chen","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5055118140"],"corresponding_institution_ids":["https://openalex.org/I126520041","https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":1.3901,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.84286033,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2960","last_page":"2969"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13234","display_name":"advanced mathematical theories","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/2610","display_name":"Mathematical Physics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.705229640007019},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6775346398353577},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3303125500679016},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24753153324127197},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.11592793464660645}],"concepts":[{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.705229640007019},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6775346398353577},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3303125500679016},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24753153324127197},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.11592793464660645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671718","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671718","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671718","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3637528.3671718","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671718","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671718","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4401863528.pdf"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W2029463628","https://openalex.org/W2963470657","https://openalex.org/W2963698657","https://openalex.org/W2979298859","https://openalex.org/W3091804006","https://openalex.org/W3172923250","https://openalex.org/W3209109662","https://openalex.org/W3214463151","https://openalex.org/W4240768087"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2393022482","https://openalex.org/W2377346130","https://openalex.org/W2073681303","https://openalex.org/W2361092061","https://openalex.org/W2319775965","https://openalex.org/W2357314690","https://openalex.org/W2390279801"],"abstract_inverted_index":{"Adam":[0,25,77,93,106,120,130,155,191,205],"is":[1,16,101,138,186,206],"widely":[2],"adopted":[3,107],"in":[4,108,197],"practical":[5],"applications":[6],"due":[7],"to":[8,33,70,146,156],"its":[9,13,66,133],"fast":[10],"convergence.":[11],"However,":[12],"theoretical":[14],"analysis":[15,117],"still":[17],"far":[18],"from":[19],"satisfactory.":[20],"Existing":[21],"convergence":[22,89,116,134],"analyses":[23],"for":[24,44],"rely":[26],"on":[27],"the":[28,35,56,62,88,102,114,122,142,148,167,187,198,202],"bounded":[29,83,123,140],"smoothness":[30,124,137],"assumption,":[31],"referred":[32,145],"as":[34,61,147],"L-smooth":[36],"condition.":[37,151],"Unfortunately,":[38],"this":[39,53,185],"assumption":[40,54],"does":[41],"not":[42],"hold":[43],"many":[45],"deep":[46,109],"learning":[47,98,110,163],"tasks.":[48,111],"Moreover,":[49],"we":[50],"believe":[51],"that":[52,128,175,190],"obscures":[55],"true":[57],"benefit":[58],"of":[59,76,90,105,118,171,204],"Adam,":[60],"algorithm":[63],"can":[64,131,177],"adapt":[65],"update":[67],"magnitude":[68],"according":[69],"local":[71],"smoothness.":[72,84],"This":[73,85],"important":[74],"feature":[75],"becomes":[78],"irrelevant":[79],"when":[80,136,158],"assuming":[81],"globally":[82],"paper":[86],"studies":[87],"randomly":[91],"reshuffled":[92],"(RR":[94],"Adam)":[95],"with":[96],"diminishing":[97,162],"rate,":[99],"which":[100],"major":[103],"version":[104],"We":[112,126,152,165],"present":[113],"first":[115,188],"RR":[119,129],"without":[121],"assumption.":[125],"demonstrate":[127],"maintain":[132],"properties":[135],"linearly":[139],"by":[141],"gradient":[143],"norm,":[144],"(L0,":[149],"L1)-smooth":[150],"further":[153],"compare":[154],"SGD":[157,172,176,193],"both":[159],"methods":[160],"use":[161],"rate.":[164],"refine":[166],"existing":[168],"lower":[169],"bound":[170],"and":[173,192,201],"show":[174],"be":[178],"slower":[179],"than":[180],"Adam.":[181],"To":[182],"our":[183],"knowledge,":[184],"time":[189],"are":[194],"rigorously":[195],"compared":[196],"same":[199],"setting":[200],"advantage":[203],"revealed.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
