{"id":"https://openalex.org/W3030916542","doi":"https://doi.org/10.1137/1.9781611976236.23","title":"Second-order Optimization for Non-convex Machine Learning: an Empirical Study","display_name":"Second-order Optimization for Non-convex Machine Learning: an Empirical Study","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3030916542","doi":"https://doi.org/10.1137/1.9781611976236.23","mag":"3030916542"},"language":"en","primary_location":{"id":"doi:10.1137/1.9781611976236.23","is_oa":true,"landing_page_url":"https://doi.org/10.1137/1.9781611976236.23","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/1.9781611976236.23","source":{"id":"https://openalex.org/S4306463922","display_name":"Society for Industrial and Applied Mathematics eBooks","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"ebook platform"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 SIAM International Conference on Data Mining","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://epubs.siam.org/doi/pdf/10.1137/1.9781611976236.23","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077532535","display_name":"Peng Xu","orcid":"https://orcid.org/0000-0003-3399-9722"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Peng Xu","raw_affiliation_strings":["Institute for Computational and Mathematical Engineering, Stanford University,"],"affiliations":[{"raw_affiliation_string":"Institute for Computational and Mathematical Engineering, Stanford University,","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056884940","display_name":"Fred Roosta","orcid":"https://orcid.org/0000-0002-6920-7072"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]},{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["AU","US"],"is_corresponding":false,"raw_author_name":"Fred Roosta","raw_affiliation_strings":["School of Mathematics and Physics, University of Queens-land, Brisbane, Australia, and International Computer Science Institute, Berkeley, USA,"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Physics, University of Queens-land, Brisbane, Australia, and International Computer Science Institute, Berkeley, USA,","institution_ids":["https://openalex.org/I1297971548","https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033006662","display_name":"Michael W. Mahoney","orcid":"https://orcid.org/0000-0001-7920-4652"},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael W. Mahoney","raw_affiliation_strings":["International Computer Science Institute and Department of Statistics, University of California at Berkeley,"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute and Department of Statistics, University of California at Berkeley,","institution_ids":["https://openalex.org/I1297971548"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5077532535"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":19.7996,"has_fulltext":true,"cited_by_count":105,"citation_normalized_percentile":{"value":0.99478433,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"199","last_page":"207"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10963","display_name":"Advanced Optimization Algorithms Research","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/2612","display_name":"Numerical Analysis"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/saddle-point","display_name":"Saddle point","score":0.6971635222434998},{"id":"https://openalex.org/keywords/curvature","display_name":"Curvature","score":0.6607752442359924},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.5728303790092468},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5141096711158752},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5037054419517517},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48002997040748596},{"id":"https://openalex.org/keywords/convex-function","display_name":"Convex function","score":0.4468994438648224},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4412948787212372},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.4222484827041626},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.41302645206451416},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4109414219856262},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40242674946784973},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.39824461936950684},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.36977922916412354},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3531957268714905},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.09150561690330505},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.0853147804737091},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08126085996627808}],"concepts":[{"id":"https://openalex.org/C2681867","wikidata":"https://www.wikidata.org/wiki/Q690935","display_name":"Saddle point","level":2,"score":0.6971635222434998},{"id":"https://openalex.org/C195065555","wikidata":"https://www.wikidata.org/wiki/Q214881","display_name":"Curvature","level":2,"score":0.6607752442359924},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.5728303790092468},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5141096711158752},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5037054419517517},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48002997040748596},{"id":"https://openalex.org/C145446738","wikidata":"https://www.wikidata.org/wiki/Q319913","display_name":"Convex function","level":3,"score":0.4468994438648224},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4412948787212372},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.4222484827041626},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.41302645206451416},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4109414219856262},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40242674946784973},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39824461936950684},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.36977922916412354},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3531957268714905},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.09150561690330505},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0853147804737091},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08126085996627808},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1137/1.9781611976236.23","is_oa":true,"landing_page_url":"https://doi.org/10.1137/1.9781611976236.23","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/1.9781611976236.23","source":{"id":"https://openalex.org/S4306463922","display_name":"Society for Industrial and Applied Mathematics eBooks","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"ebook platform"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 SIAM International Conference on Data Mining","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.1137/1.9781611976236.23","is_oa":true,"landing_page_url":"https://doi.org/10.1137/1.9781611976236.23","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/1.9781611976236.23","source":{"id":"https://openalex.org/S4306463922","display_name":"Society for Industrial and Applied Mathematics eBooks","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"ebook platform"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 SIAM International Conference on Data Mining","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4588159538","display_name":null,"funder_award_id":"CE140100049","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G4713059963","display_name":null,"funder_award_id":"FA8750","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G7316068524","display_name":null,"funder_award_id":"CE14010004","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G7403545485","display_name":null,"funder_award_id":"DE180100923","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3030916542.pdf","grobid_xml":"https://content.openalex.org/works/W3030916542.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W104184427","https://openalex.org/W196761320","https://openalex.org/W607505555","https://openalex.org/W1569098853","https://openalex.org/W1899249567","https://openalex.org/W1994974865","https://openalex.org/W2006903949","https://openalex.org/W2026079992","https://openalex.org/W2051434435","https://openalex.org/W2052816682","https://openalex.org/W2146989110","https://openalex.org/W2156005216","https://openalex.org/W2194775991","https://openalex.org/W2467074172","https://openalex.org/W2524032428","https://openalex.org/W2592651140","https://openalex.org/W2615262195","https://openalex.org/W2900789157","https://openalex.org/W2963307318","https://openalex.org/W2963586744","https://openalex.org/W3118608800","https://openalex.org/W3120421331","https://openalex.org/W4298544744","https://openalex.org/W4301014524"],"related_works":["https://openalex.org/W3094031457","https://openalex.org/W3172664294","https://openalex.org/W2294590153","https://openalex.org/W4220659530","https://openalex.org/W4292651679","https://openalex.org/W2400034325","https://openalex.org/W2499557797","https://openalex.org/W2985014567","https://openalex.org/W4299515259","https://openalex.org/W2032676126"],"abstract_inverted_index":{"While":[0],"first-order":[1],"optimization":[2],"methods,":[3,95],"such":[4,27,54],"as":[5,28,55],"SGD":[6,130],"are":[7,47,142],"popular":[8],"in":[9,38,50,58,69,154],"machine":[10],"learning":[11,29],"(ML),":[12],"they":[13,141],"come":[14],"with":[15,106,128,131],"well-known":[16],"deficiencies,":[17],"including":[18],"relatively-slow":[19],"convergence,":[20],"sensitivity":[21],"to":[22,74,145,164],"the":[23,152],"settings":[24,53],"of":[25,90,93,99],"hyper-parameters":[26],"rate,":[30],"stagnation":[31],"at":[32],"high":[33],"training":[34],"errors,":[35],"and":[36,42,103,169],"difficulty":[37],"escaping":[39],"flat":[40,167],"regions":[41,168],"saddle":[43,170],"points.":[44,171],"These":[45],"issues":[46],"particularly":[48],"acute":[49],"highly":[51,143],"non-convex":[52,111],"those":[56],"arising":[57],"neural":[59],"networks.":[60],"Motivated":[61],"by":[62,78],"this,":[63],"there":[64],"has":[65],"been":[66],"recent":[67],"interest":[68],"second-order":[70],"methods":[71,121,158],"that":[72,119,151],"aim":[73],"alleviate":[75],"these":[76,120,156],"shortcomings":[77],"capturing":[79],"curvature":[80,160],"information.":[81],"In":[82,114],"this":[83],"paper,":[84],"we":[85,117,149],"report":[86],"detailed":[87],"empirical":[88],"evaluations":[89],"a":[91],"class":[92],"Newton-type":[94,157],"namely":[96],"sub-sampled":[97],"variants":[98],"trust":[100],"region":[101],"(TR)":[102],"adaptive":[104],"regularization":[105],"cubics":[107],"(ARC)":[108],"algorithms,":[109],"for":[110],"ML":[112],"problems.":[113],"doing":[115],"so,":[116],"demonstrate":[118],"not":[122],"only":[123],"can":[124],"be":[125],"computationally":[126],"competitive":[127],"hand-tuned":[129],"momentum,":[132],"obtaining":[133],"comparable":[134],"or":[135],"better":[136],"generalization":[137],"performance,":[138],"but":[139],"also":[140],"robust":[144],"hyper-parameter":[146],"settings.":[147],"Further,":[148],"show":[150],"manner":[153],"which":[155],"employ":[159],"information":[161],"allows":[162],"them":[163],"seamlessly":[165],"escape":[166]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":22},{"year":2021,"cited_by_count":24},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
