{"id":"https://openalex.org/W7134056761","doi":"https://doi.org/10.48550/arxiv.2603.05002","title":"Non-Euclidean Gradient Descent Operates at the Edge of Stability","display_name":"Non-Euclidean Gradient Descent Operates at the Edge of Stability","publication_year":2026,"publication_date":"2026-03-05","ids":{"openalex":"https://openalex.org/W7134056761","doi":"https://doi.org/10.48550/arxiv.2603.05002"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05002","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128237974","display_name":"Rustem Islamov","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Islamov, Rustem","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086974356","display_name":"Michael Crawshaw","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Crawshaw, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128256528","display_name":"Jeremy Cohen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cohen, Jeremy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128238323","display_name":"Robert Gower","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gower, Robert","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5128237974"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.8752999901771545,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.8752999901771545,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.044199999421834946,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.007400000002235174,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.7649999856948853},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.7179999947547913},{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.7102000117301941},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6776000261306763},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.6377999782562256},{"id":"https://openalex.org/keywords/sharpening","display_name":"Sharpening","score":0.6215000152587891},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5795000195503235}],"concepts":[{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.7649999856948853},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.7179999947547913},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.7102000117301941},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6776000261306763},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.6377999782562256},{"id":"https://openalex.org/C2781137444","wikidata":"https://www.wikidata.org/wiki/Q237105","display_name":"Sharpening","level":2,"score":0.6215000152587891},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5795000195503235},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5685999989509583},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5591999888420105},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.45190000534057617},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3646000027656555},{"id":"https://openalex.org/C2776637919","wikidata":"https://www.wikidata.org/wiki/Q624380","display_name":"Descent (aeronautics)","level":2,"score":0.34689998626708984},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.295199990272522},{"id":"https://openalex.org/C83415579","wikidata":"https://www.wikidata.org/wiki/Q161973","display_name":"Arc (geometry)","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.2838999927043915},{"id":"https://openalex.org/C15627037","wikidata":"https://www.wikidata.org/wiki/Q541961","display_name":"Classification of discontinuities","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C167879884","wikidata":"https://www.wikidata.org/wiki/Q727568","display_name":"Balanced flow","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2775000035762787},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05002","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05002","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05002","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05002","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"Edge":[1],"of":[2,13,57,62,167],"Stability":[3],"(EoS)":[4],"is":[5],"a":[6,155,164],"phenomenon":[7],"where":[8],"the":[9,14,21,60,148],"sharpness":[10,82,89,137],"(largest":[11],"eigenvalue)":[12],"Hessian":[15],"approaches":[16],"and":[17,96,120],"then":[18],"hovers":[19],"near":[20],"stability":[22],"threshold":[23,149],"$2/\u03b7$":[24],"during":[25],"gradient":[26,169],"descent":[27],"(GD)":[28],"with":[29,134],"step":[30],"size":[31],"$\u03b7$.":[32],"Despite":[33],"(apparently)":[34],"violating":[35],"classical":[36],"smoothness":[37],"assumptions,":[38],"EoS":[39,58,108],"has":[40,109],"been":[41,111],"widely":[42],"observed":[43],"in":[44],"deep":[45],"learning,":[46],"but":[47],"its":[48],"theoretical":[49],"foundations":[50],"remain":[51],"incomplete.":[52],"We":[53],"provide":[54],"an":[55,84],"interpretation":[56,70],"through":[59],"lens":[61],"Directional":[63],"Smoothness":[64],"[Mishkin":[65],"et":[66],"al.,":[67],"2024].":[68],"This":[69],"naturally":[71],"extends":[72],"to":[73,79],"non-Euclidean":[74,132,168],"norms,":[75],"which":[76,107],"we":[77,129],"use":[78],"define":[80],"generalized":[81,88,136],"under":[83],"arbitrary":[85],"norm.":[86],"Our":[87],"measure":[90],"includes":[91],"previously":[92],"studied":[93],"vanilla":[94],"GD":[95,98,133],"preconditioned":[97],"as":[99,102,104,114],"special":[100],"cases,":[101],"well":[103],"methods":[105],"for":[106],"not":[110],"studied,":[112],"such":[113],"$\\ell_{\\infty}$-descent,":[115],"Block":[116],"CD,":[117],"Spectral":[118],"GD,":[119],"their":[121],"normalized":[122],"versions.":[123],"Through":[124],"experiments":[125],"on":[126],"neural":[127],"networks,":[128],"show":[130],"that":[131,159],"our":[135,152],"also":[138],"exhibits":[139],"progressive":[140],"sharpening":[141],"followed":[142],"by":[143],"oscillations":[144],"around":[145],"or":[146],"above":[147],"$2/\u03b7$.":[150],"Practically,":[151],"framework":[153],"provides":[154],"geometry-aware":[156],"spectral":[157],"diagnostic":[158],"can":[160],"be":[161],"applied":[162],"across":[163],"broad":[165],"class":[166],"methods.":[170]},"counts_by_year":[],"updated_date":"2026-05-30T06:14:24.967023","created_date":"2026-03-07T00:00:00"}
