{"id":"https://openalex.org/W2895642264","doi":"https://doi.org/10.1007/978-3-030-01424-7_39","title":"Width of Minima Reached by Stochastic Gradient Descent is Influenced by Learning Rate to Batch Size Ratio","display_name":"Width of Minima Reached by Stochastic Gradient Descent is Influenced by Learning Rate to Batch Size Ratio","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2895642264","doi":"https://doi.org/10.1007/978-3-030-01424-7_39","mag":"2895642264"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-030-01424-7_39","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-030-01424-7_39","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/1b1d210a-efed-44b7-8907-c1506f70a64d","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024552077","display_name":"Stanis\u0142aw Jastrz\u0229bski","orcid":"https://orcid.org/0000-0003-4138-1818"},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]},{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA","PL"],"is_corresponding":true,"raw_author_name":"Stanislaw Jastrz\u0119bski","raw_affiliation_strings":["Facebook AI Research, Paris, France","Jagiellonian University, Krak\u00f3w, Poland","MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Paris, France","institution_ids":[]},{"raw_affiliation_string":"Jagiellonian University, Krak\u00f3w, Poland","institution_ids":["https://openalex.org/I126596746"]},{"raw_affiliation_string":"MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069438696","display_name":"Zachary Kenton","orcid":null},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]},{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA","PL"],"is_corresponding":false,"raw_author_name":"Zachary Kenton","raw_affiliation_strings":["Jagiellonian University, Krak\u00f3w, Poland","MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"Jagiellonian University, Krak\u00f3w, Poland","institution_ids":["https://openalex.org/I126596746"]},{"raw_affiliation_string":"MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000111662","display_name":"Devansh Arpit","orcid":null},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Devansh Arpit","raw_affiliation_strings":["MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057065873","display_name":"Nicolas Ballas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nicolas Ballas","raw_affiliation_strings":["Facebook AI Research, Paris, France"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026151059","display_name":"Asja Fischer","orcid":"https://orcid.org/0000-0002-1916-7033"},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Asja Fischer","raw_affiliation_strings":["Faculty of Mathematics, Ruhr-University Bochum, Bochum, Germany"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Ruhr-University Bochum, Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086198262","display_name":"Yoshua Bengio","orcid":"https://orcid.org/0000-0002-9322-3515"},"institutions":[{"id":"https://openalex.org/I109736498","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95","country_code":"CA","type":"facility","lineage":["https://openalex.org/I109736498"]},{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Yoshua Bengio","raw_affiliation_strings":["CIFAR Senior Fellow, Toronto, Canada","MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"CIFAR Senior Fellow, Toronto, Canada","institution_ids":["https://openalex.org/I109736498"]},{"raw_affiliation_string":"MILA, Universit\u00e9 de Montr\u00e9al, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007901825","display_name":"Amos Storkey","orcid":"https://orcid.org/0000-0002-8100-506X"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Amos Storkey","raw_affiliation_strings":["School of Informatics, University of Edinburgh, Edinburgh, Scotland"],"affiliations":[{"raw_affiliation_string":"School of Informatics, University of Edinburgh, Edinburgh, Scotland","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5024552077"],"corresponding_institution_ids":["https://openalex.org/I126596746","https://openalex.org/I70931966"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":2.7188,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.92973965,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"392","last_page":"402"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.9182946681976318},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.8093979954719543},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7651249170303345},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.683286190032959},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.638809323310852},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5779041647911072},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.5121512413024902},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5090369582176208},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.48229363560676575},{"id":"https://openalex.org/keywords/generalization-error","display_name":"Generalization error","score":0.46938344836235046},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.46913760900497437},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4284172058105469},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4236929714679718},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.41994738578796387},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.3221508264541626},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22576865553855896},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.12973245978355408},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.10776534676551819}],"concepts":[{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.9182946681976318},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.8093979954719543},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7651249170303345},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.683286190032959},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.638809323310852},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5779041647911072},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.5121512413024902},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5090369582176208},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.48229363560676575},{"id":"https://openalex.org/C117765406","wikidata":"https://www.wikidata.org/wiki/Q5362437","display_name":"Generalization error","level":3,"score":0.46938344836235046},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.46913760900497437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4284172058105469},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4236929714679718},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41994738578796387},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3221508264541626},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22576865553855896},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.12973245978355408},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.10776534676551819},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/978-3-030-01424-7_39","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-030-01424-7_39","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:pure.ed.ac.uk:publications/1b1d210a-efed-44b7-8907-c1506f70a64d","is_oa":true,"landing_page_url":"https://link.springer.com/chapter/10.1007/978-3-030-01424-7_39","pdf_url":"https://www.research.ed.ac.uk/en/publications/1b1d210a-efed-44b7-8907-c1506f70a64d","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.ed.ac.uk:openaire/1b1d210a-efed-44b7-8907-c1506f70a64d","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/1b1d210a-efed-44b7-8907-c1506f70a64d","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Jastrz\u0119bski, S, Kenton, Z, Arpit, D, Ballas, N, Fischer, A, Bengio, Y & Storkey, A 2018, Width of Minima Reached by Stochastic Gradient Descent is Influenced by Learning Rate to Batch Size Ratio. in Proceedings of 27th International Conference on Artificial Neural Networks. Lecture Notes in Computer Science, vol. 11141, Theoretical Computer Science and General Issues, vol. 11141, Rhodes, Greece, pp. 392-402, 27th International Conference on Artificial Neural Networks , Rhodes, Greece, 4/10/18. https://doi.org/10.1007/978-3-030-01424-7_39","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:publications/1b1d210a-efed-44b7-8907-c1506f70a64d","is_oa":true,"landing_page_url":"https://link.springer.com/chapter/10.1007/978-3-030-01424-7_39","pdf_url":"https://www.research.ed.ac.uk/en/publications/1b1d210a-efed-44b7-8907-c1506f70a64d","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2895642264.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1575104486","https://openalex.org/W1686810756","https://openalex.org/W2167433878","https://openalex.org/W2605488176","https://openalex.org/W2617242334","https://openalex.org/W2618076303","https://openalex.org/W2622263826","https://openalex.org/W2626325961","https://openalex.org/W2682189153","https://openalex.org/W2750384547","https://openalex.org/W2782476368","https://openalex.org/W2790626470","https://openalex.org/W2912811302","https://openalex.org/W2963177640","https://openalex.org/W2963959597","https://openalex.org/W2964072432","https://openalex.org/W4231021340","https://openalex.org/W6600503824","https://openalex.org/W6600737549","https://openalex.org/W6740005241"],"related_works":["https://openalex.org/W2115886818","https://openalex.org/W3118431319","https://openalex.org/W3004700632","https://openalex.org/W2466106906","https://openalex.org/W3177888558","https://openalex.org/W3027810930","https://openalex.org/W2069187387","https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W4366280654"],"abstract_inverted_index":null,"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-10-10T00:00:00"}
