{"id":"https://openalex.org/W3201675503","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534194","title":"Learning the Step-size Policy for the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno Algorithm","display_name":"Learning the Step-size Policy for the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno Algorithm","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3201675503","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534194","mag":"3201675503"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9534194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075202398","display_name":"Lucas N. Egidio","orcid":"https://orcid.org/0000-0003-4096-5969"},"institutions":[{"id":"https://openalex.org/I95674353","display_name":"UCLouvain","ror":"https://ror.org/02495e989","country_code":"BE","type":"education","lineage":["https://openalex.org/I95674353"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Lucas N. Egidio","raw_affiliation_strings":["ICTEAM/INMA, Universit\u00e9 Catholique de Louvain, Belgium"],"affiliations":[{"raw_affiliation_string":"ICTEAM/INMA, Universit\u00e9 Catholique de Louvain, Belgium","institution_ids":["https://openalex.org/I95674353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017570497","display_name":"Anders Hansson","orcid":"https://orcid.org/0000-0002-7934-6009"},"institutions":[{"id":"https://openalex.org/I102134673","display_name":"Link\u00f6ping University","ror":"https://ror.org/05ynxx418","country_code":"SE","type":"education","lineage":["https://openalex.org/I102134673"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Anders Hansson","raw_affiliation_strings":["Link\u00f6ping University, Link\u00f6ping, Sweden"],"affiliations":[{"raw_affiliation_string":"Link\u00f6ping University, Link\u00f6ping, Sweden","institution_ids":["https://openalex.org/I102134673"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045354671","display_name":"Bo Wahlberg","orcid":"https://orcid.org/0000-0002-1927-1690"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Bo Wahlberg","raw_affiliation_strings":["School of Electrical Engineering, KTH Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075202398"],"corresponding_institution_ids":["https://openalex.org/I95674353"],"apc_list":null,"apc_paid":null,"fwci":1.3597,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.8467854,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/broyden\u2013fletcher\u2013goldfarb\u2013shanno-algorithm","display_name":"Broyden\u2013Fletcher\u2013Goldfarb\u2013Shanno algorithm","score":0.8837814331054688},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7231767773628235},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5580668449401855},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5481524467468262},{"id":"https://openalex.org/keywords/mnist-database","display_name":"MNIST database","score":0.5217583775520325},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.45297354459762573},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.42435935139656067},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4151456654071808},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4114452302455902},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3043646812438965},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2207127809524536}],"concepts":[{"id":"https://openalex.org/C132721684","wikidata":"https://www.wikidata.org/wiki/Q2877013","display_name":"Broyden\u2013Fletcher\u2013Goldfarb\u2013Shanno algorithm","level":3,"score":0.8837814331054688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7231767773628235},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5580668449401855},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5481524467468262},{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.5217583775520325},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.45297354459762573},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.42435935139656067},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4151456654071808},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4114452302455902},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3043646812438965},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2207127809524536},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9534194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322327","display_name":"Knut och Alice Wallenbergs Stiftelse","ror":"https://ror.org/004hzzk67"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W99485931","https://openalex.org/W1798702550","https://openalex.org/W1942704184","https://openalex.org/W2051434435","https://openalex.org/W2097998348","https://openalex.org/W2106411961","https://openalex.org/W2112796928","https://openalex.org/W2137825550","https://openalex.org/W2166107799","https://openalex.org/W2409744450","https://openalex.org/W2465345967","https://openalex.org/W2740631809","https://openalex.org/W2786910476","https://openalex.org/W2798520605","https://openalex.org/W2916158460","https://openalex.org/W2950277768","https://openalex.org/W2952086041","https://openalex.org/W2963775850","https://openalex.org/W2963807877","https://openalex.org/W2964058036","https://openalex.org/W2970791107","https://openalex.org/W2970971581","https://openalex.org/W2971490762","https://openalex.org/W2974578743","https://openalex.org/W2981603589","https://openalex.org/W3118608800","https://openalex.org/W4295312788","https://openalex.org/W4300448178","https://openalex.org/W6600284362","https://openalex.org/W6638209102","https://openalex.org/W6640590530","https://openalex.org/W6674385629","https://openalex.org/W6676179485","https://openalex.org/W6714477741","https://openalex.org/W6717367658","https://openalex.org/W6719646854","https://openalex.org/W6742208537","https://openalex.org/W6747827020","https://openalex.org/W6751191612","https://openalex.org/W6759546789","https://openalex.org/W6766978945","https://openalex.org/W6767514912","https://openalex.org/W6767977373","https://openalex.org/W6768441595"],"related_works":["https://openalex.org/W4386603768","https://openalex.org/W2950475743","https://openalex.org/W3008580913","https://openalex.org/W3202411070","https://openalex.org/W1701967867","https://openalex.org/W2894173309","https://openalex.org/W4387932263","https://openalex.org/W2098962763","https://openalex.org/W2371065793","https://openalex.org/W2157746493"],"abstract_inverted_index":{"We":[0,75],"consider":[1],"the":[2,10,50,66,85,89,106,112,132,140,146,152,165],"problem":[3,130],"to":[4,41,71,204],"learn":[5],"a":[6,17,77,117,127,178,185,195,202,212],"step-size":[7],"policy":[8,93,197],"for":[9,25,37,49,54,151,155,159,208],"Limited-Memory":[11],"Broyden-Fletcher-Goldfarb-Shanno":[12],"(L-BFGS)":[13],"algorithm.":[14,136],"This":[15],"is":[16,31,94,124,143],"limited":[18],"computational":[19],"memory":[20],"quasi-Newton":[21],"method":[22,142],"widely":[23],"used":[24,200],"deterministic":[26],"unconstrained":[27],"optimization.":[28],"However,":[29],"L-BFGS":[30,55,176,183],"currently":[32],"avoided":[33],"in":[34,221],"large-scale":[35,223],"problems":[36,210],"requiring":[38],"step":[39,51,114,187],"sizes":[40],"be":[42,199],"provided":[43],"at":[44],"each":[45],"iteration.":[46],"Current":[47],"methodologies":[48],"size":[52],"selection":[53],"use":[56,220],"heuristic":[57],"tuning":[58],"of":[59,65,84,98,105,139,148],"design":[60],"parameters":[61],"and":[62,69,109,158,182],"massive":[63],"re-evaluations":[64],"objective":[67,107],"function":[68],"gradient":[70],"find":[72],"appropriate":[73],"step-lengths.":[74],"propose":[76],"neural":[78],"network":[79],"architecture":[80],"with":[81,177,184],"local":[82],"information":[83],"current":[86],"iterate":[87],"as":[88,126,173,201],"input.":[90],"The":[91,120,137,161,189],"step-length":[92],"learned":[95,196],"from":[96],"data":[97],"similar":[99],"optimization":[100,129,224],"problems,":[101],"avoids":[102],"additional":[103,214],"evaluations":[104],"function,":[108],"guarantees":[110],"that":[111,164,194],"output":[113],"remains":[115],"inside":[116],"pre-defined":[118],"interval.":[119],"corresponding":[121],"training":[122,147,215],"procedure":[123],"formulated":[125],"stochastic":[128],"using":[131],"backpropagation":[133],"through":[134],"time":[135],"performance":[138],"proposed":[141,166],"evaluated":[144],"on":[145],"image":[149],"classifiers":[150],"MNIST":[153],"database":[154],"handwritten":[156],"digits":[157],"CIFAR-10.":[160],"results":[162,191],"show":[163,193],"algorithm":[167],"outperforms":[168],"heuristically":[169],"tuned":[170],"optimizers":[171],"such":[172],"ADAM,":[174],"RMSprop,":[175],"backtracking":[179],"line":[180],"search,":[181],"constant":[186],"size.":[188],"numerical":[190],"also":[192],"can":[198],"warm-start":[203],"train":[205],"new":[206],"policies":[207],"different":[209],"after":[211],"few":[213],"steps,":[216],"highlighting":[217],"its":[218],"potential":[219],"multiple":[222],"problems.":[225]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
