{"id":"https://openalex.org/W7139966452","doi":"https://doi.org/10.48550/arxiv.2603.18899","title":"Uniform a priori bounds and error analysis for the Adam stochastic gradient descent optimization method","display_name":"Uniform a priori bounds and error analysis for the Adam stochastic gradient descent optimization method","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139966452","doi":"https://doi.org/10.48550/arxiv.2603.18899"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.18899","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18899","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.18899","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086965787","display_name":"Steffen Dereich","orcid":"https://orcid.org/0000-0001-9591-9340"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dereich, Steffen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130221654","display_name":"Thang Do","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Do, Thang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074382323","display_name":"Arnulf Jentzen","orcid":"https://orcid.org/0000-0002-9840-3339"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jentzen, Arnulf","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086965787"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9628999829292297,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9628999829292297,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.00419999985024333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.0032999999821186066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.7342000007629395},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.6208999752998352},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.554099977016449},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5338000059127808},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex optimization","score":0.46860000491142273},{"id":"https://openalex.org/keywords/gradient-method","display_name":"Gradient method","score":0.43799999356269836},{"id":"https://openalex.org/keywords/stochastic-optimization","display_name":"Stochastic optimization","score":0.42669999599456787},{"id":"https://openalex.org/keywords/stochastic-approximation","display_name":"Stochastic approximation","score":0.4174000024795532},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.41100001335144043}],"concepts":[{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.7342000007629395},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.6208999752998352},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5568000078201294},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.554099977016449},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5338000059127808},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5174000263214111},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.46860000491142273},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.43799999356269836},{"id":"https://openalex.org/C194387892","wikidata":"https://www.wikidata.org/wiki/Q1747770","display_name":"Stochastic optimization","level":2,"score":0.42669999599456787},{"id":"https://openalex.org/C55479107","wikidata":"https://www.wikidata.org/wiki/Q97663916","display_name":"Stochastic approximation","level":3,"score":0.4174000024795532},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C145446738","wikidata":"https://www.wikidata.org/wiki/Q319913","display_name":"Convex function","level":3,"score":0.3831000030040741},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3741999864578247},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36570000648498535},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C12108790","wikidata":"https://www.wikidata.org/wiki/Q2234833","display_name":"Convex analysis","level":4,"score":0.32710000872612},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.3181999921798706},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C7321624","wikidata":"https://www.wikidata.org/wiki/Q205","display_name":"Infinity","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C122268817","wikidata":"https://www.wikidata.org/wiki/Q2020318","display_name":"Frank\u2013Wolfe algorithm","level":5,"score":0.29409998655319214},{"id":"https://openalex.org/C55660270","wikidata":"https://www.wikidata.org/wiki/Q5164377","display_name":"Constrained optimization","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2831999957561493},{"id":"https://openalex.org/C122383733","wikidata":"https://www.wikidata.org/wiki/Q865920","display_name":"Approximation error","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C2776637919","wikidata":"https://www.wikidata.org/wiki/Q624380","display_name":"Descent (aeronautics)","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C10494615","wikidata":"https://www.wikidata.org/wiki/Q17086765","display_name":"Proximal Gradient Methods","level":4,"score":0.25940001010894775}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.18899","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18899","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.18899","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18899","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"adaptive":[1],"moment":[2],"estimation":[3],"(Adam)":[4],"optimizer":[5],"proposed":[6],"by":[7],"Kingma":[8],"&amp;":[9],"Ba":[10],"(2014)":[11],"is":[12,109],"presumably":[13],"the":[14,24,41,86,95,110,130],"most":[15],"popular":[16],"stochastic":[17,73],"gradient":[18],"descent":[19],"(SGD)":[20],"optimization":[21,74],"method":[22],"for":[23,63,81,122,129,138,140],"training":[25,42],"of":[26,43,59,113,144],"deep":[27],"neural":[28],"networks":[29],"(DNNs)":[30],"in":[31,40,85],"artificial":[32],"intelligence":[33],"(AI)":[34],"systems.":[35],"Despite":[36],"its":[37],"groundbreaking":[38],"success":[39],"AI":[44],"systems,":[45],"it":[46],"still":[47],"remains":[48,105],"an":[49,134],"open":[50],"research":[51],"problem":[52],"to":[53,70,102,116,126],"provide":[54,88,127],"a":[55,119,141],"complete":[56],"error":[57,78,136],"analysis":[58,79,137],"Adam,":[60],"not":[61,100],"only":[62],"optimizing":[64],"DNNs":[65],"but":[66,104],"even":[67],"when":[68],"applied":[69],"strongly":[71,82,145],"convex":[72,83,146],"problems":[75],"(SOPs).":[76],"Previous":[77],"results":[80],"SOPs":[84],"literature":[87],"conditional":[89],"convergence":[90],"analyses":[91],"that":[92,97],"rely":[93],"on":[94],"assumption":[96],"Adam":[98,123,139],"does":[99],"diverge":[101],"infinity":[103],"uniformly":[106],"bounded.":[107],"It":[108],"key":[111],"contribution":[112],"this":[114],"work":[115],"establish":[117],"uniform":[118],"priori":[120],"bounds":[121],"and,":[124],"thereby,":[125],"--":[128,133],"first":[131],"time":[132],"unconditional":[135],"large":[142],"class":[143],"SOPs.":[147]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-21T00:00:00"}
