{"id":"https://openalex.org/W4387813256","doi":"https://doi.org/10.3390/a16100490","title":"Deep Neural Networks Training by Stochastic Quasi-Newton Trust-Region Methods","display_name":"Deep Neural Networks Training by Stochastic Quasi-Newton Trust-Region Methods","publication_year":2023,"publication_date":"2023-10-20","ids":{"openalex":"https://openalex.org/W4387813256","doi":"https://doi.org/10.3390/a16100490"},"language":"en","primary_location":{"id":"doi:10.3390/a16100490","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16100490","pdf_url":"https://www.mdpi.com/1999-4893/16/10/490/pdf?version=1697792759","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/16/10/490/pdf?version=1697792759","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101748566","display_name":"Mahsa Yousefi","orcid":"https://orcid.org/0000-0002-2937-9654"},"institutions":[{"id":"https://openalex.org/I142444530","display_name":"University of Trieste","ror":"https://ror.org/02n742c10","country_code":"IT","type":"education","lineage":["https://openalex.org/I142444530"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Mahsa Yousefi","raw_affiliation_strings":["Department of Mathematics and Geoscienzes, University of Trieste, 34127 Trieste, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Geoscienzes, University of Trieste, 34127 Trieste, Italy","institution_ids":["https://openalex.org/I142444530"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048616941","display_name":"\u00c1ngeles Mart\u00ednez","orcid":"https://orcid.org/0000-0003-4826-1114"},"institutions":[{"id":"https://openalex.org/I142444530","display_name":"University of Trieste","ror":"https://ror.org/02n742c10","country_code":"IT","type":"education","lineage":["https://openalex.org/I142444530"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"\u00c1ngeles Mart\u00ednez","raw_affiliation_strings":["Department of Mathematics and Geoscienzes, University of Trieste, 34127 Trieste, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Geoscienzes, University of Trieste, 34127 Trieste, Italy","institution_ids":["https://openalex.org/I142444530"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048616941"],"corresponding_institution_ids":["https://openalex.org/I142444530"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.6913,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.76345559,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"16","issue":"10","first_page":"490","last_page":"490"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.8307251930236816},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.763601541519165},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.527820885181427},{"id":"https://openalex.org/keywords/broyden\u2013fletcher\u2013goldfarb\u2013shanno-algorithm","display_name":"Broyden\u2013Fletcher\u2013Goldfarb\u2013Shanno algorithm","score":0.5273072719573975},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5129804015159607},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5004935264587402},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.46227771043777466},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.45360881090164185},{"id":"https://openalex.org/keywords/quasi-newton-method","display_name":"Quasi-Newton method","score":0.4334879517555237},{"id":"https://openalex.org/keywords/trust-region","display_name":"Trust region","score":0.4119175970554352},{"id":"https://openalex.org/keywords/newtons-method","display_name":"Newton's method","score":0.4037899971008301},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37202584743499756},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22122415900230408},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.22076985239982605},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.1621498167514801}],"concepts":[{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.8307251930236816},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.763601541519165},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.527820885181427},{"id":"https://openalex.org/C132721684","wikidata":"https://www.wikidata.org/wiki/Q2877013","display_name":"Broyden\u2013Fletcher\u2013Goldfarb\u2013Shanno algorithm","level":3,"score":0.5273072719573975},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5129804015159607},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5004935264587402},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.46227771043777466},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.45360881090164185},{"id":"https://openalex.org/C114954040","wikidata":"https://www.wikidata.org/wiki/Q1476018","display_name":"Quasi-Newton method","level":4,"score":0.4334879517555237},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.4119175970554352},{"id":"https://openalex.org/C85189116","wikidata":"https://www.wikidata.org/wiki/Q374195","display_name":"Newton's method","level":3,"score":0.4037899971008301},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37202584743499756},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22122415900230408},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.22076985239982605},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.1621498167514801},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C178635117","wikidata":"https://www.wikidata.org/wiki/Q747499","display_name":"RADIUS","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/a16100490","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16100490","pdf_url":"https://www.mdpi.com/1999-4893/16/10/490/pdf?version=1697792759","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:4dc7ac0872134310ab5fe225610efb37","is_oa":true,"landing_page_url":"https://doaj.org/article/4dc7ac0872134310ab5fe225610efb37","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 16, Iss 10, p 490 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a16100490","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16100490","pdf_url":"https://www.mdpi.com/1999-4893/16/10/490/pdf?version=1697792759","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2020143847","display_name":null,"funder_award_id":"E53C22001930001","funder_id":"https://openalex.org/F4320334079","funder_display_name":"Gruppo Nazionale per il Calcolo Scientifico"},{"id":"https://openalex.org/G3918974238","display_name":null,"funder_award_id":"CUP_E53C22001930001","funder_id":"https://openalex.org/F4320334079","funder_display_name":"Gruppo Nazionale per il Calcolo Scientifico"},{"id":"https://openalex.org/G5456286744","display_name":null,"funder_award_id":"E53C22001930001","funder_id":"https://openalex.org/F4320311030","funder_display_name":"Istituto Nazionale di Alta Matematica \"Francesco Severi\""},{"id":"https://openalex.org/G6559942121","display_name":null,"funder_award_id":"CUP_E53C22001930001","funder_id":"https://openalex.org/F4320311030","funder_display_name":"Istituto Nazionale di Alta Matematica \"Francesco Severi\""},{"id":"https://openalex.org/G7855521421","display_name":null,"funder_award_id":"ECS_00000043","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320311030","display_name":"Istituto Nazionale di Alta Matematica \"Francesco Severi\"","ror":"https://ror.org/01vx64p53"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320334079","display_name":"Gruppo Nazionale per il Calcolo Scientifico","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387813256.pdf"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W147998453","https://openalex.org/W242703491","https://openalex.org/W1491622225","https://openalex.org/W1592294486","https://openalex.org/W1836465849","https://openalex.org/W1942704184","https://openalex.org/W1994616650","https://openalex.org/W2000389939","https://openalex.org/W2042173174","https://openalex.org/W2075184363","https://openalex.org/W2102486516","https://openalex.org/W2107438106","https://openalex.org/W2112796928","https://openalex.org/W2135482703","https://openalex.org/W2146502635","https://openalex.org/W2194775991","https://openalex.org/W2316266851","https://openalex.org/W2748050758","https://openalex.org/W2786910476","https://openalex.org/W2811026747","https://openalex.org/W2910666328","https://openalex.org/W2943933428","https://openalex.org/W2950311990","https://openalex.org/W2963156201","https://openalex.org/W2963160732","https://openalex.org/W2963397933","https://openalex.org/W2963433607","https://openalex.org/W2963465983","https://openalex.org/W2963941964","https://openalex.org/W2964303576","https://openalex.org/W2970227300","https://openalex.org/W3003775594","https://openalex.org/W3030916542","https://openalex.org/W3035182906","https://openalex.org/W3118608800","https://openalex.org/W3127411449","https://openalex.org/W3206876740","https://openalex.org/W4297782689","https://openalex.org/W6608133726","https://openalex.org/W6631190155","https://openalex.org/W6635643970","https://openalex.org/W6675672627","https://openalex.org/W6676105031","https://openalex.org/W6681435938","https://openalex.org/W6713121291","https://openalex.org/W6779861650"],"related_works":["https://openalex.org/W2026245502","https://openalex.org/W4318192194","https://openalex.org/W2070932288","https://openalex.org/W4221162014","https://openalex.org/W2937637171","https://openalex.org/W1964096161","https://openalex.org/W2572889128","https://openalex.org/W3091576739","https://openalex.org/W2892976100","https://openalex.org/W2958624776"],"abstract_inverted_index":{"While":[0],"first-order":[1,188],"methods":[2,34],"are":[3,36,177],"popular":[4],"for":[5,57,116],"solving":[6],"optimization":[7],"problems":[8],"arising":[9],"in":[10,28,90,130,136,180],"deep":[11,59,137],"learning,":[12],"they":[13],"come":[14],"with":[15,192],"some":[16,181],"acute":[17],"deficiencies.":[18],"To":[19],"overcome":[20],"these":[21],"shortcomings,":[22],"there":[23],"has":[24],"been":[25],"recent":[26],"interest":[27],"introducing":[29],"second-order":[30,203],"information":[31],"through":[32],"quasi-Newton":[33,55,66,175],"that":[35,150,173],"able":[37,183],"to":[38,122,184],"construct":[39],"Hessian":[40,118],"approximations":[41],"using":[42,104],"only":[43],"gradient":[44],"information.":[45],"In":[46],"this":[47],"work,":[48],"we":[49],"study":[50,79,149],"the":[51,68,73,84,91,105,111,125,131,143,159,162,167,186,193,201],"performance":[52,86],"of":[53,87,145,196],"stochastic":[54,174,202],"algorithms":[56,176],"training":[58,99],"neural":[60],"networks.":[61],"We":[62,139],"consider":[63],"two":[64],"well-known":[65,187],"updates,":[67],"limited-memory":[69],"Broyden\u2013Fletcher\u2013Goldfarb\u2013Shanno":[70],"(BFGS)":[71],"and":[72,94,141,200],"symmetric":[74],"rank":[75],"one":[76],"(SR1).":[77],"This":[78],"fills":[80],"a":[81],"gap":[82],"concerning":[83],"real":[85],"both":[88],"updates":[89],"minibatch":[92],"setting":[93],"analyzes":[95],"whether":[96],"more":[97,106],"efficient":[98,178],"can":[100],"be":[101],"obtained":[102],"when":[103],"robust":[107],"BFGS":[108],"update":[109],"or":[110,166],"cheaper":[112],"SR1":[113],"formula,":[114],"which\u2014allowing":[115],"indefinite":[117],"approximations\u2014can":[119],"potentially":[120],"help":[121],"better":[123],"navigate":[124],"pathological":[126],"saddle":[127],"points":[128],"present":[129,140],"non-convex":[132],"loss":[133],"functions":[134],"found":[135],"learning.":[138],"discuss":[142],"results":[144,171],"an":[146],"extensive":[147],"experimental":[148],"includes":[151],"many":[152],"aspects":[153],"affecting":[154],"performance,":[155],"like":[156],"batch":[157,168],"normalization,":[158],"network":[160],"architecture,":[161],"limited":[163],"memory":[164],"parameter":[165],"size.":[169],"Our":[170],"show":[172],"and,":[179],"instances,":[182],"outperform":[185],"Adam":[189],"optimizer,":[190],"run":[191],"optimal":[194],"combination":[195],"its":[197],"numerous":[198],"hyperparameters,":[199],"trust-region":[204],"STORM":[205],"algorithm.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
