{"id":"https://openalex.org/W7160935524","doi":"https://doi.org/10.48550/arxiv.2605.08352","title":"Convergence Analysis of Newton's Method for Neural Networks in the Overparameterized Limit","display_name":"Convergence Analysis of Newton's Method for Neural Networks in the Overparameterized Limit","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160935524","doi":"https://doi.org/10.48550/arxiv.2605.08352"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08352","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08352","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08352","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088292594","display_name":"Konstantin Riedl","orcid":"https://orcid.org/0000-0002-2206-4334"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Riedl, Konstantin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135965142","display_name":"Konstantinos Spiliopoulos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Spiliopoulos, Konstantinos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135985083","display_name":"Justin Sirignano","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sirignano, Justin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.76419997215271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.76419997215271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.042500000447034836,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.040699999779462814,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.8101000189781189},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.5067999958992004},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5016000270843506},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4934999942779541},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.49050000309944153},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.47780001163482666},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.4724000096321106},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.46059998869895935},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.45350000262260437}],"concepts":[{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.8101000189781189},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6996999979019165},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5845999717712402},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.5067999958992004},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5016000270843506},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4934999942779541},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.49050000309944153},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.47780001163482666},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.4724000096321106},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.46059998869895935},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.45350000262260437},{"id":"https://openalex.org/C130367717","wikidata":"https://www.wikidata.org/wiki/Q189791","display_name":"Diagonal","level":2,"score":0.38929998874664307},{"id":"https://openalex.org/C84545080","wikidata":"https://www.wikidata.org/wiki/Q1147936","display_name":"Condition number","level":3,"score":0.3513999879360199},{"id":"https://openalex.org/C90377204","wikidata":"https://www.wikidata.org/wiki/Q1052594","display_name":"Uniform boundedness","level":3,"score":0.3379000127315521},{"id":"https://openalex.org/C138187205","wikidata":"https://www.wikidata.org/wiki/Q131251","display_name":"Tangent","level":2,"score":0.3255999982357025},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.3138999938964844},{"id":"https://openalex.org/C152442038","wikidata":"https://www.wikidata.org/wiki/Q2778212","display_name":"Tikhonov regularization","level":3,"score":0.3003999888896942},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C145446738","wikidata":"https://www.wikidata.org/wiki/Q319913","display_name":"Convex function","level":3,"score":0.290800005197525},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C31441030","wikidata":"https://www.wikidata.org/wiki/Q4291882","display_name":"Least absolute deviations","level":3,"score":0.27480000257492065},{"id":"https://openalex.org/C117898588","wikidata":"https://www.wikidata.org/wiki/Q6664310","display_name":"Local convergence","level":3,"score":0.2732999920845032},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.27239999175071716},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C48216909","wikidata":"https://www.wikidata.org/wiki/Q6889156","display_name":"Modes of convergence (annotated index)","level":5,"score":0.26019999384880066},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08352","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08352","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08352","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08352","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"convergence":[1,54,85,114,209],"analysis":[2,158,216],"is":[3,86,134],"developed":[4],"for":[5,10,105,115,144,221,246,265],"the":[6,16,20,28,36,59,65,71,89,93,103,123,131,160,165,175,178,190,197,201,208,211,223,236,253,262,274,281],"regularized":[7,254],"Newton":[8,166,263],"method":[9,139,167],"training":[11,30,198,260],"neural":[12,46],"networks":[13],"(NNs)":[14],"in":[15,33,58,97,156,200],"overparameterized":[17,202],"limit.":[18],"As":[19],"number":[21,237],"of":[22,38,102,164,180,184,210,238,250],"hidden":[23,239,251],"units":[24,240],"tends":[25,186],"to":[26,35,70,112,140,153,187,270],"infinity,":[27],"NN":[29,66,191,267],"dynamics":[31,199,213],"converge":[32,141,269],"probability":[34],"solution":[37],"a":[39,44,75,169,218,232,278],"deterministic":[40],"limit":[41,203],"equation":[42],"involving":[43],"``Newton":[45],"tangent":[47],"kernel''":[48],"(NNTK).":[49],"Explicit":[50],"rates":[51],"characterizing":[52],"this":[53,84,181],"are":[55],"provided":[56],"and,":[57],"infinite-width":[60],"limit,":[61],"we":[62,227],"prove":[63,244],"that":[64,83,151,177,273],"converges":[67],"exponentially":[68],"fast":[69],"target":[72,116],"data":[73,117,145],"(i.e.,":[74],"global":[76],"minimizer":[77],"with":[78,118,146,168],"zero":[79],"loss).":[80],"We":[81,243],"show":[82,228],"uniform":[87],"across":[88],"frequency":[90],"spectrum,":[91],"addressing":[92],"spectral":[94],"bias":[95],"inherent":[96],"gradient":[98,106],"descent.":[99],"The":[100,215],"eigenvalues":[101,129],"NTK":[104],"descent":[107],"accumulate":[108],"at":[109,231],"zero,":[110,271],"leading":[111],"slow":[113],"high-frequency":[119,147],"components.":[120,148],"In":[121],"contrast,":[122],"NNTK":[124],"has":[125],"uniformly":[126],"lower":[127],"bounded":[128],"if":[130],"regularization":[132,224],"parameter":[133,162],"selected":[135],"appropriately,":[136],"allowing":[137],"Newton's":[138],"more":[142],"quickly":[143],"Mathematical":[149],"challenges":[150],"need":[152],"be":[154],"addressed":[155],"our":[157],"include":[159],"implicit":[161],"update":[163],"potentially":[170],"indefinite":[171],"Hessian":[172,255],"matrix":[173],"and":[174,261],"fact":[176],"dimension":[179],"linear":[182],"system":[183],"equations":[185],"infinity":[188],"as":[189,204,206,235,277],"width":[192],"grows.":[193],"This":[194],"complicates":[195],"deriving":[196],"well":[205],"proving":[207],"finite-width":[212],"thereto.":[214],"identifies":[217],"scaling":[219],"formula":[220],"selecting":[222],"parameter,":[225],"which":[226],"can":[229],"vanish":[230],"suitable":[233],"rate":[234],"becomes":[241],"larger.":[242],"that,":[245],"sufficiently":[247],"large":[248],"numbers":[249],"units,":[252],"remains":[256],"positive":[257],"definite":[258],"during":[259],"updates":[264],"individual":[266],"parameters":[268],"showing":[272],"model":[275],"behaves":[276],"linearization":[279],"around":[280],"initialization.":[282]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
