{"id":"https://openalex.org/W7161036926","doi":"https://doi.org/10.48550/arxiv.2605.11316","title":"Error whitening: Why Gauss-Newton outperforms Newton","display_name":"Error whitening: Why Gauss-Newton outperforms Newton","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7161036926","doi":"https://doi.org/10.48550/arxiv.2605.11316"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.11316","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11316","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.11316","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054396961","display_name":"Maricela Best McKay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McKay, Maricela Best","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022622508","display_name":"Nathan P. Lawrence","orcid":"https://orcid.org/0000-0002-7147-0048"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lawrence, Nathan P.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136067299","display_name":"Brian Wetton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wetton, Brian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122876389","display_name":"R. Bhushan Gopaluni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gopaluni, R. Bhushan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9415000081062317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9415000081062317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.01489999983459711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.008700000122189522,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/jacobian-matrix-and-determinant","display_name":"Jacobian matrix and determinant","score":0.7013000249862671},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6061999797821045},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.5421000123023987},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5418000221252441},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.48590001463890076},{"id":"https://openalex.org/keywords/tangent-space","display_name":"Tangent space","score":0.42750000953674316},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.41760000586509705},{"id":"https://openalex.org/keywords/tangent","display_name":"Tangent","score":0.39259999990463257}],"concepts":[{"id":"https://openalex.org/C200331156","wikidata":"https://www.wikidata.org/wiki/Q506041","display_name":"Jacobian matrix and determinant","level":2,"score":0.7013000249862671},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6665999889373779},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6061999797821045},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.5421000123023987},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5418000221252441},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5051000118255615},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.48590001463890076},{"id":"https://openalex.org/C157157409","wikidata":"https://www.wikidata.org/wiki/Q909601","display_name":"Tangent space","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4262000024318695},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.41760000586509705},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.40529999136924744},{"id":"https://openalex.org/C138187205","wikidata":"https://www.wikidata.org/wiki/Q131251","display_name":"Tangent","level":2,"score":0.39259999990463257},{"id":"https://openalex.org/C202286095","wikidata":"https://www.wikidata.org/wiki/Q579262","display_name":"Error function","level":2,"score":0.3467000126838684},{"id":"https://openalex.org/C142730499","wikidata":"https://www.wikidata.org/wiki/Q934367","display_name":"Function space","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C9936470","wikidata":"https://www.wikidata.org/wiki/Q6510405","display_name":"Least-squares function approximation","level":3,"score":0.320499986410141},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.31929999589920044},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C178295079","wikidata":"https://www.wikidata.org/wiki/Q919765","display_name":"Tangent stiffness matrix","level":4,"score":0.29760000109672546},{"id":"https://openalex.org/C56275529","wikidata":"https://www.wikidata.org/wiki/Q5348937","display_name":"Eight-point algorithm","level":5,"score":0.29019999504089355},{"id":"https://openalex.org/C122383733","wikidata":"https://www.wikidata.org/wiki/Q865920","display_name":"Approximation error","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C49712288","wikidata":"https://www.wikidata.org/wiki/Q77601250","display_name":"Positive-definite matrix","level":3,"score":0.2775000035762787},{"id":"https://openalex.org/C179127668","wikidata":"https://www.wikidata.org/wiki/Q17086396","display_name":"Newton's method in optimization","level":4,"score":0.275299996137619},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2651999890804291},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C77246614","wikidata":"https://www.wikidata.org/wiki/Q1409400","display_name":"Gramian matrix","level":3,"score":0.2623000144958496}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.11316","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11316","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.11316","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11316","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,119],"Gauss-Newton":[1,20,40,65,166,179],"matrix":[2,42,66,104],"is":[3,108,136,161],"widely":[4],"viewed":[5],"as":[6],"a":[7,27,56,162],"positive":[8],"semidefinite":[9],"approximation":[10],"of":[11,93,111,149,165],"the":[12,38,44,51,62,72,87,91,94,99,103,109,112,125,134,138,147,150,153,157,182],"Hessian,":[13],"yet":[14],"mounting":[15],"empirical":[16],"evidence":[17],"shows":[18],"that":[19,37,168,178],"descent":[21,167],"outperforms":[22,189],"Newton's":[23,173,190],"method.":[24,174],"We":[25,35,127,175],"adopt":[26],"function":[28,48,73,185],"space":[29,49,74,186],"perspective":[30],"to":[31,67,116,143],"analyze":[32],"this":[33,78,129],"phenomenon.":[34],"show":[36],"generalized":[39],"(GGN)":[41],"projects":[43,71],"Newton":[45],"direction":[46],"in":[47],"onto":[50,77],"model's":[52,88,100],"tangent":[53,80],"space,":[54],"while":[55],"Jacobian-only":[57],"variant":[58],"obtained":[59],"by":[60,146,156],"applying":[61],"least":[63],"squares":[64,69],"non-least":[68],"losses":[70],"loss":[75,151],"gradient":[76],"same":[79],"space.":[81],"Both":[82],"projections":[83,120],"eliminate":[84],"distortions":[85],"from":[86,172],"parameterization.":[89],"Specifically,":[90],"evolution":[92],"prediction-target":[95,139],"mismatch":[96,140],"depends":[97],"on":[98],"parameterization":[101,135],"through":[102],"$JJ^\\top$":[105,123],"where":[106],"$J$":[107],"Jacobian":[110],"model":[113],"with":[114,124],"respect":[115],"its":[117],"parameters.":[118],"effectively":[121],"replace":[122],"identity.":[126],"call":[128],"effect":[130],"error":[131],"whitening.":[132],"Once":[133],"removed,":[137],"evolves":[141],"according":[142],"dynamics":[144,187],"dictated":[145],"structure":[148],"and":[152,188,193,204],"projection":[154],"produced":[155],"optimizer.":[158],"Error":[159],"whitening":[160],"special":[163],"property":[164],"rigorously":[169],"distinguishes":[170],"it":[171],"empirically":[176],"demonstrate":[177],"optimizers":[180],"follow":[181],"theoretically":[183],"predicted":[184],"method,":[191],"Adam,":[192],"Muon":[194],"across":[195],"case":[196],"studies":[197],"spanning":[198],"supervised":[199],"learning,":[200,203],"physics-informed":[201],"deep":[202],"approximate":[205],"dynamic":[206],"programming.":[207]},"counts_by_year":[],"updated_date":"2026-07-01T08:55:40.977307","created_date":"2026-05-14T00:00:00"}
