{"id":"https://openalex.org/W7154951950","doi":"https://doi.org/10.48550/arxiv.2604.15554","title":"Natural gradient descent with momentum","display_name":"Natural gradient descent with momentum","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154951950","doi":"https://doi.org/10.48550/arxiv.2604.15554"},"language":"en","primary_location":{"id":"pmh:oai:HAL:hal-05654260v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05654260","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2026","raw_type":"info:eu-repo/semantics/preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15554","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134053515","display_name":"Anthony Nouy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nouy, Anthony","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134024886","display_name":"Agust\u00edn Somacal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Somacal, Agust\u00edn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.6078000068664551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.6078000068664551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.20260000228881836,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.04699999839067459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.7631999850273132},{"id":"https://openalex.org/keywords/differentiable-function","display_name":"Differentiable function","score":0.54830002784729},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.5069000124931335},{"id":"https://openalex.org/keywords/nonlinear-conjugate-gradient-method","display_name":"Nonlinear conjugate gradient method","score":0.49970000982284546},{"id":"https://openalex.org/keywords/function-space","display_name":"Function space","score":0.45910000801086426},{"id":"https://openalex.org/keywords/tangent-space","display_name":"Tangent space","score":0.45210000872612},{"id":"https://openalex.org/keywords/manifold","display_name":"Manifold (fluid mechanics)","score":0.4341000020503998},{"id":"https://openalex.org/keywords/tangent","display_name":"Tangent","score":0.42170000076293945},{"id":"https://openalex.org/keywords/tangent-stiffness-matrix","display_name":"Tangent stiffness matrix","score":0.4207000136375427}],"concepts":[{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.7631999850273132},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6726999878883362},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.5490000247955322},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.54830002784729},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C26362088","wikidata":"https://www.wikidata.org/wiki/Q17086453","display_name":"Nonlinear conjugate gradient method","level":4,"score":0.49970000982284546},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.4862000048160553},{"id":"https://openalex.org/C142730499","wikidata":"https://www.wikidata.org/wiki/Q934367","display_name":"Function space","level":2,"score":0.45910000801086426},{"id":"https://openalex.org/C157157409","wikidata":"https://www.wikidata.org/wiki/Q909601","display_name":"Tangent space","level":2,"score":0.45210000872612},{"id":"https://openalex.org/C529865628","wikidata":"https://www.wikidata.org/wiki/Q1790740","display_name":"Manifold (fluid mechanics)","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C138187205","wikidata":"https://www.wikidata.org/wiki/Q131251","display_name":"Tangent","level":2,"score":0.42170000076293945},{"id":"https://openalex.org/C178295079","wikidata":"https://www.wikidata.org/wiki/Q919765","display_name":"Tangent stiffness matrix","level":4,"score":0.4207000136375427},{"id":"https://openalex.org/C149944404","wikidata":"https://www.wikidata.org/wiki/Q2392464","display_name":"Tangent cone","level":3,"score":0.4016000032424927},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.3961000144481659},{"id":"https://openalex.org/C47890412","wikidata":"https://www.wikidata.org/wiki/Q1179296","display_name":"Tangent vector","level":3,"score":0.3912999927997589},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.3684000074863434},{"id":"https://openalex.org/C167879884","wikidata":"https://www.wikidata.org/wiki/Q727568","display_name":"Balanced flow","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C192939610","wikidata":"https://www.wikidata.org/wiki/Q188444","display_name":"Differential geometry","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.35569998621940613},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C38365724","wikidata":"https://www.wikidata.org/wiki/Q4677469","display_name":"Activation function","level":3,"score":0.3255999982357025},{"id":"https://openalex.org/C93779851","wikidata":"https://www.wikidata.org/wiki/Q271977","display_name":"Partial differential equation","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.30630001425743103},{"id":"https://openalex.org/C2011187","wikidata":"https://www.wikidata.org/wiki/Q383851","display_name":"Directional derivative","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C202286095","wikidata":"https://www.wikidata.org/wiki/Q579262","display_name":"Error function","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C2777021972","wikidata":"https://www.wikidata.org/wiki/Q22976830","display_name":"Uniqueness","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C158847443","wikidata":"https://www.wikidata.org/wiki/Q1997812","display_name":"Method of steepest descent","level":2,"score":0.2590000033378601},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:HAL:hal-05654260v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05654260","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2026","raw_type":"info:eu-repo/semantics/preprint"},{"id":"doi:10.48550/arxiv.2604.15554","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15554","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15554","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15554","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"consider":[1],"the":[2,37,54,76,78,82,86,90,94,118,122,139,147,155,164,176,207],"problem":[3],"of":[4,11,39,75,81,85,163,166,192],"approximating":[5],"a":[6,12,17,40,47,60,64,70,100,106,114,143,161,167,189],"function":[7,42,111,149],"by":[8,59],"an":[9],"element":[10],"nonlinear":[13,144,213],"manifold":[14,92,145],"which":[15],"admits":[16],"differentiable":[18,26],"parametrization,":[19],"typical":[20],"examples":[21],"being":[22],"neural":[23],"networks":[24],"with":[25,97,212],"activation":[27],"functions":[28],"or":[29,146,160,199],"tensor":[30],"networks.":[31],"Natural":[32],"gradient":[33,49,116,126,129,178],"descent":[34,50,130],"(NGD)":[35],"for":[36,157],"optimization":[38],"loss":[41,148],"can":[43,205],"be":[44],"seen":[45],"as":[46],"preconditioned":[48],"where":[51],"updates":[52],"in":[53,110,134,171],"parameter":[55],"space":[56,88,120],"are":[57],"driven":[58],"functional":[61],"perspective.":[62],"In":[63],"spirit":[65],"similar":[66],"to":[67,89,99,105,121],"Newton's":[68],"method,":[69],"NGD":[71],"step":[72],"uses,":[73],"instead":[74],"Hessian,":[77],"Gram":[79],"matrix":[80],"generating":[83],"system":[84],"tangent":[87,119],"approximation":[91],"at":[93,183],"current":[95],"iterate,":[96],"respect":[98],"suitable":[101],"metric.":[102],"This":[103,186],"corresponds":[104],"locally":[107],"optimal":[108],"update":[109],"space,":[112],"following":[113],"projected":[115],"onto":[117],"manifold.":[123],"Still,":[124],"both":[125],"and":[127,201],"natural":[128,177,190],"methods":[131,196],"get":[132],"stuck":[133],"local":[135],"minima.":[136],"Furthermore,":[137],"when":[138,210],"model":[140,214],"class":[141],"is":[142,150],"not":[151],"ideally":[152],"conditioned":[153],"(e.g.,":[154],"KL-divergence":[156],"density":[158],"estimation,":[159],"norm":[162],"residual":[165],"partial":[168],"differential":[169],"equation":[170],"physics":[172],"informed":[173],"learning),":[174],"even":[175],"might":[179],"yield":[180],"non-optimal":[181],"directions":[182],"each":[184],"step.":[185],"work":[187],"introduces":[188],"version":[191],"classical":[193],"inertial":[194],"dynamic":[195],"like":[197],"Heavy-Ball":[198],"Nesterov":[200],"show":[202],"how":[203],"it":[204],"improve":[206],"learning":[208],"process":[209],"working":[211],"classes.":[215]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-21T00:00:00"}
