{"id":"https://openalex.org/W3215746499","doi":"https://doi.org/10.1162/neco_a_01626","title":"The Limiting Dynamics of SGD: Modified Loss, Phase-Space Oscillations, and Anomalous Diffusion","display_name":"The Limiting Dynamics of SGD: Modified Loss, Phase-Space Oscillations, and Anomalous Diffusion","publication_year":2023,"publication_date":"2023-12-05","ids":{"openalex":"https://openalex.org/W3215746499","doi":"https://doi.org/10.1162/neco_a_01626","mag":"3215746499","pmid":"https://pubmed.ncbi.nlm.nih.gov/38052080"},"language":"en","primary_location":{"id":"doi:10.1162/neco_a_01626","is_oa":false,"landing_page_url":"https://doi.org/10.1162/neco_a_01626","pdf_url":null,"source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2107.09133","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037955577","display_name":"Daniel Kunin","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel Kunin","raw_affiliation_strings":["Stanford University, Stanford, CA 94305, U.S.A. kunin@stanford.edu"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA 94305, U.S.A. kunin@stanford.edu","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052251046","display_name":"Javier Sagastuy-Bre\u00f1a","orcid":"https://orcid.org/0000-0003-3057-194X"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Javier Sagastuy-Brena","raw_affiliation_strings":["Stanford University, Stanford, CA 94305, U.S.A. jvrsgsty@stanford.edu"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA 94305, U.S.A. jvrsgsty@stanford.edu","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042379226","display_name":"Lauren Gillespie","orcid":"https://orcid.org/0000-0003-2496-8035"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lauren Gillespie","raw_affiliation_strings":["Stanford University, Stanford, CA 94305, U.S.A. gillespl@stanford.edu"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA 94305, U.S.A. gillespl@stanford.edu","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026259311","display_name":"Eshed Margalit","orcid":"https://orcid.org/0000-0003-0841-7444"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Eshed Margalit","raw_affiliation_strings":["Stanford University, Stanford, CA 94305, U.S.A. eshedm@stanford.edu"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA 94305, U.S.A. eshedm@stanford.edu","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101548634","display_name":"Hidenori Tanaka","orcid":"https://orcid.org/0000-0003-2778-1962"},"institutions":[{"id":"https://openalex.org/I4210092597","display_name":"NTT (United States)","ror":"https://ror.org/004cn7092","country_code":"US","type":"company","lineage":["https://openalex.org/I2251713219","https://openalex.org/I4210092597"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hidenori Tanaka","raw_affiliation_strings":["NTT Research, Sunnyvale, CA 94085, U.S.A. hidenori.tanaka@ntt-research.com"],"affiliations":[{"raw_affiliation_string":"NTT Research, Sunnyvale, CA 94085, U.S.A. hidenori.tanaka@ntt-research.com","institution_ids":["https://openalex.org/I4210092597"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056551357","display_name":"Surya Ganguli","orcid":"https://orcid.org/0000-0002-9264-7551"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Surya Ganguli","raw_affiliation_strings":["Facebook AI Research, Menlo Park, CA 94025, U.S.A. sganguli@stanford.edu","Stanford University, Stanford, CA 94305, U.S.A"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Menlo Park, CA 94025, U.S.A. sganguli@stanford.edu","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Stanford University, Stanford, CA 94305, U.S.A","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011316863","display_name":"Daniel Yamins","orcid":"https://orcid.org/0000-0001-6155-4523"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel L. K. Yamins","raw_affiliation_strings":["Stanford University, Stanford, CA 94305, U.S.A. yamins@stanford.edu"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA 94305, U.S.A. yamins@stanford.edu","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5011316863","https://openalex.org/A5026259311","https://openalex.org/A5037955577","https://openalex.org/A5042379226","https://openalex.org/A5052251046","https://openalex.org/A5056551357","https://openalex.org/A5101548634"],"corresponding_institution_ids":["https://openalex.org/I4210092597","https://openalex.org/I4210114444","https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.6713,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.6360431,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"36","issue":"1","first_page":"151","last_page":"174"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11520","display_name":"Advanced Thermodynamics and Statistical Mechanics","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11520","display_name":"Advanced Thermodynamics and Statistical Mechanics","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11187","display_name":"Nonlinear Dynamics and Pattern Formation","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10591","display_name":"Theoretical and Computational Physics","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/3104","display_name":"Condensed Matter Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.7204368114471436},{"id":"https://openalex.org/keywords/phase-space","display_name":"Phase space","score":0.6305093765258789},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5971828699111938},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.5644853711128235},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.5522258281707764},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.4771142303943634},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4704999625682831},{"id":"https://openalex.org/keywords/phase","display_name":"Phase (matter)","score":0.46955209970474243},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3402408957481384},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32645362615585327},{"id":"https://openalex.org/keywords/quantum-mechanics","display_name":"Quantum mechanics","score":0.1383092999458313},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0730639100074768}],"concepts":[{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.7204368114471436},{"id":"https://openalex.org/C151342819","wikidata":"https://www.wikidata.org/wiki/Q62542","display_name":"Phase space","level":2,"score":0.6305093765258789},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5971828699111938},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.5644853711128235},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.5522258281707764},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.4771142303943634},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4704999625682831},{"id":"https://openalex.org/C44280652","wikidata":"https://www.wikidata.org/wiki/Q104837","display_name":"Phase (matter)","level":2,"score":0.46955209970474243},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3402408957481384},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32645362615585327},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.1383092999458313},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0730639100074768},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/neco_a_01626","is_oa":false,"landing_page_url":"https://doi.org/10.1162/neco_a_01626","pdf_url":null,"source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},{"id":"pmid:38052080","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38052080","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural computation","raw_type":null},{"id":"pmh:oai:arXiv.org:2107.09133","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2107.09133","pdf_url":"https://arxiv.org/pdf/2107.09133","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2107.09133","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2107.09133","pdf_url":"https://arxiv.org/pdf/2107.09133","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.75,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":89,"referenced_works":["https://openalex.org/W1567512734","https://openalex.org/W1598866093","https://openalex.org/W1980287119","https://openalex.org/W2071048859","https://openalex.org/W2162657038","https://openalex.org/W2194775991","https://openalex.org/W2328151902","https://openalex.org/W2552194003","https://openalex.org/W2612690371","https://openalex.org/W2622263826","https://openalex.org/W2626325961","https://openalex.org/W2752366553","https://openalex.org/W2766164908","https://openalex.org/W2766678531","https://openalex.org/W2768267830","https://openalex.org/W2796744096","https://openalex.org/W2809090039","https://openalex.org/W2893110193","https://openalex.org/W2899771611","https://openalex.org/W2903327037","https://openalex.org/W2904243021","https://openalex.org/W2905181074","https://openalex.org/W2910207440","https://openalex.org/W2911495555","https://openalex.org/W2912322140","https://openalex.org/W2912713668","https://openalex.org/W2947654433","https://openalex.org/W2951958592","https://openalex.org/W2962804662","https://openalex.org/W2962819303","https://openalex.org/W2963000508","https://openalex.org/W2963095610","https://openalex.org/W2963173418","https://openalex.org/W2963177640","https://openalex.org/W2963201159","https://openalex.org/W2963384892","https://openalex.org/W2963655672","https://openalex.org/W2963959597","https://openalex.org/W2964052793","https://openalex.org/W2964072432","https://openalex.org/W2964286272","https://openalex.org/W2970217468","https://openalex.org/W2970550417","https://openalex.org/W2971130081","https://openalex.org/W3035466410","https://openalex.org/W3035548130","https://openalex.org/W3037508544","https://openalex.org/W3037624241","https://openalex.org/W3087768334","https://openalex.org/W3102766317","https://openalex.org/W3104527631","https://openalex.org/W3111160927","https://openalex.org/W3119368353","https://openalex.org/W3120901154","https://openalex.org/W3121698465","https://openalex.org/W3123802267","https://openalex.org/W3125761069","https://openalex.org/W3130298903","https://openalex.org/W3130996241","https://openalex.org/W3132026801","https://openalex.org/W3169235690","https://openalex.org/W3169803339","https://openalex.org/W4212774754","https://openalex.org/W4234552385","https://openalex.org/W4256389389","https://openalex.org/W4286403637","https://openalex.org/W4287322898","https://openalex.org/W4287751218","https://openalex.org/W4287989672","https://openalex.org/W4289286706","https://openalex.org/W4293765466","https://openalex.org/W4295151193","https://openalex.org/W4297813530","https://openalex.org/W6634467100","https://openalex.org/W6687483927","https://openalex.org/W6692572551","https://openalex.org/W6738534199","https://openalex.org/W6739826692","https://openalex.org/W6748173244","https://openalex.org/W6756040250","https://openalex.org/W6756548207","https://openalex.org/W6758153731","https://openalex.org/W6758207765","https://openalex.org/W6765175657","https://openalex.org/W6765920069","https://openalex.org/W6775078174","https://openalex.org/W6784102847","https://openalex.org/W6791088654","https://openalex.org/W7048915699"],"related_works":["https://openalex.org/W4243145179","https://openalex.org/W4255875982","https://openalex.org/W4244853958","https://openalex.org/W2029404707","https://openalex.org/W4285325679","https://openalex.org/W4247719608","https://openalex.org/W4237439661","https://openalex.org/W1928239295","https://openalex.org/W4242981732","https://openalex.org/W4232927149"],"abstract_inverted_index":{"In":[0],"this":[1,85,90,114,192],"work,":[2],"we":[3,92,123,148,210],"explore":[4],"the":[5,49,64,68,71,75,79,117,130,134,145,151,159,165,174,195,205,216,228],"limiting":[6,218,229],"dynamics":[7,132,156,196,219,230],"of":[8,36,51,66,81,119,133,167,191,197,207,220,231],"deep":[9,221],"neural":[10,222],"networks":[11,26,223],"trained":[12,201,224],"with":[13,54,100,225],"stochastic":[14],"gradient":[15,52,72],"descent":[16],"(SGD).":[17],"As":[18],"observed":[19],"previously,":[20],"long":[21],"after":[22],"performance":[23],"has":[24],"converged,":[25],"continue":[27],"to":[28,142],"move":[29],"through":[30],"parameter":[31],"space":[32],"by":[33],"a":[34,45,55,95,168,198,212,250],"process":[35],"anomalous":[37,86,217],"diffusion":[38],"in":[39,48,70,116,182,194],"which":[40,171],"distance":[41],"traveled":[42],"grows":[43],"as":[44,107,249],"power":[46],"law":[47],"number":[50],"updates":[53],"nontrivial":[56],"exponent.":[57],"We":[58,112,185],"reveal":[59],"an":[60,108],"intricate":[61],"interaction":[62],"among":[63],"hyperparameters":[65,239],"optimization,":[67],"structure":[69],"noise,":[73],"and":[74,104,136,176,188,233,245],"Hessian":[76],"matrix":[77],"at":[78],"end":[80],"training":[82,161],"that":[83,150,179,255],"explains":[84],"diffusion.":[87],"To":[88],"build":[89],"understanding,":[91],"first":[93],"derive":[94,125],"continuous-time":[96],"model":[97,200],"for":[98,129,215,252],"SGD":[99],"finite":[101],"learning":[102,243],"rates":[103],"batch":[105,241],"sizes":[106],"underdamped":[109],"Langevin":[110],"equation.":[111],"study":[113],"equation":[115],"setting":[118],"linear":[120],"regression,":[121],"where":[122],"can":[124,247,256],"exact,":[126],"analytic":[127],"expressions":[128],"phase-space":[131],"parameters":[135],"their":[137],"instantaneous":[138],"velocities":[139],"from":[140],"initialization":[141],"stationarity.":[143],"Using":[144],"Fokker-Planck":[146],"equation,":[147],"show":[149],"key":[152],"ingredient":[153],"driving":[154],"these":[155,258],"is":[157],"not":[158],"original":[160],"loss":[162],"but":[163],"rather":[164],"combination":[166],"modified":[169],"loss,":[170],"implicitly":[172],"regularizes":[173],"velocity,":[175],"probability":[177],"currents":[178],"cause":[180],"oscillations":[181],"phase":[183],"space.":[184],"identify":[186],"qualitative":[187],"quantitative":[189],"predictions":[190],"theory":[193],"ResNet-18":[199],"on":[202,236],"ImageNet.":[203],"Through":[204],"lens":[206],"statistical":[208],"physics,":[209],"uncover":[211],"mechanistic":[213],"origin":[214],"SGD.":[226],"Understanding":[227],"SGD,":[232],"its":[234],"dependence":[235],"various":[237],"important":[238],"like":[240],"size,":[242],"rate,":[244],"momentum,":[246],"serve":[248],"basis":[251],"future":[253],"work":[254],"turn":[257],"insights":[259],"into":[260],"algorithmic":[261],"gains.":[262]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
