{"id":"https://openalex.org/W4306793909","doi":"https://doi.org/10.1109/tit.2022.3215088","title":"On Generalization Bounds for Deep Networks Based on Loss Surface Implicit Regularization","display_name":"On Generalization Bounds for Deep Networks Based on Loss Surface Implicit Regularization","publication_year":2022,"publication_date":"2022-10-14","ids":{"openalex":"https://openalex.org/W4306793909","doi":"https://doi.org/10.1109/tit.2022.3215088"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2022.3215088","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tit.2022.3215088","pdf_url":"https://ieeexplore.ieee.org/ielx7/18/10024105/09919858.pdf","source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/18/10024105/09919858.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066308746","display_name":"Masaaki Imaizumi","orcid":"https://orcid.org/0000-0001-6186-613X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Masaaki Imaizumi","raw_affiliation_strings":["Komaba Institute for Science, The University of Tokyo, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0001-6186-613X","affiliations":[{"raw_affiliation_string":"Komaba Institute for Science, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002981992","display_name":"Johannes Schmidt-Hieber","orcid":"https://orcid.org/0000-0003-2699-4990"},"institutions":[{"id":"https://openalex.org/I94624287","display_name":"University of Twente","ror":"https://ror.org/006hf6230","country_code":"NL","type":"education","lineage":["https://openalex.org/I94624287"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Johannes Schmidt-Hieber","raw_affiliation_strings":["Department of Applied Mathematics, University of Twente, Enschede, NB, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-2699-4990","affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics, University of Twente, Enschede, NB, The Netherlands","institution_ids":["https://openalex.org/I94624287"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066308746"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.2775,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.63572299,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"69","issue":"2","first_page":"1203","last_page":"1223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.7723817825317383},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5868077278137207},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.579039454460144},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5306742787361145},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.5143193006515503},{"id":"https://openalex.org/keywords/convexity","display_name":"Convexity","score":0.5142366290092468},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5099767446517944},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4822264611721039},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4422139823436737},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3789981007575989},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3786134719848633},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2889241874217987},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.17697033286094666}],"concepts":[{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.7723817825317383},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5868077278137207},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.579039454460144},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5306742787361145},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5143193006515503},{"id":"https://openalex.org/C72134830","wikidata":"https://www.wikidata.org/wiki/Q5166524","display_name":"Convexity","level":2,"score":0.5142366290092468},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5099767446517944},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4822264611721039},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4422139823436737},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3789981007575989},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3786134719848633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2889241874217987},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.17697033286094666},{"id":"https://openalex.org/C106159729","wikidata":"https://www.wikidata.org/wiki/Q2294553","display_name":"Financial economics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tit.2022.3215088","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tit.2022.3215088","pdf_url":"https://ieeexplore.ieee.org/ielx7/18/10024105/09919858.pdf","source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},{"id":"pmh:oai:ris.utwente.nl:openaire/b3fdb8de-83be-4269-b9fa-50472008efaa","is_oa":true,"landing_page_url":"https://research.utwente.nl/en/publications/b3fdb8de-83be-4269-b9fa-50472008efaa","pdf_url":"https://ris.utwente.nl/ws/files/309818389/On_Generalization_Bounds_for_Deep_Networks_Based_on_Loss_Surface_Implicit_Regularization_77_.pdf","source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Imaizumi, M & Schmidt-Hieber, A J 2023, 'On Generalization Bounds for Deep Networks Based on Loss Surface Implicit Regularization', IEEE transactions on information theory, vol. 69, no. 2, pp. 1203- 1223. https://doi.org/10.1109/TIT.2022.3215088","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1109/tit.2022.3215088","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tit.2022.3215088","pdf_url":"https://ieeexplore.ieee.org/ielx7/18/10024105/09919858.pdf","source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[{"id":"https://openalex.org/G5773834618","display_name":null,"funder_award_id":"JPMJPR1852","funder_id":"https://openalex.org/F4320334789","funder_display_name":"Japan Science and Technology Agency"},{"id":"https://openalex.org/G6264024165","display_name":null,"funder_award_id":"VI.Vidi.192.021","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G8513268496","display_name":null,"funder_award_id":"18K18114","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320334789","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4306793909.pdf","grobid_xml":"https://content.openalex.org/works/W4306793909.grobid-xml"},"referenced_works_count":139,"referenced_works":["https://openalex.org/W1720114023","https://openalex.org/W1811750039","https://openalex.org/W1899249567","https://openalex.org/W2010353172","https://openalex.org/W2052044664","https://openalex.org/W2083731191","https://openalex.org/W2100007248","https://openalex.org/W2194775991","https://openalex.org/W2258658829","https://openalex.org/W2259695933","https://openalex.org/W2344998188","https://openalex.org/W2625063094","https://openalex.org/W2732724430","https://openalex.org/W2736970300","https://openalex.org/W2749772809","https://openalex.org/W2768267830","https://openalex.org/W2777662428","https://openalex.org/W2790253170","https://openalex.org/W2792285703","https://openalex.org/W2809090039","https://openalex.org/W2886836477","https://openalex.org/W2895992958","https://openalex.org/W2896457183","https://openalex.org/W2898859254","https://openalex.org/W2899748887","https://openalex.org/W2908206262","https://openalex.org/W2910589063","https://openalex.org/W2912099989","https://openalex.org/W2914852400","https://openalex.org/W2919115771","https://openalex.org/W2946840143","https://openalex.org/W2962702650","https://openalex.org/W2963038205","https://openalex.org/W2963094815","https://openalex.org/W2963122491","https://openalex.org/W2963173418","https://openalex.org/W2963236897","https://openalex.org/W2963248893","https://openalex.org/W2963264710","https://openalex.org/W2963317585","https://openalex.org/W2963446085","https://openalex.org/W2963477238","https://openalex.org/W2963518130","https://openalex.org/W2963664410","https://openalex.org/W2963798163","https://openalex.org/W2963826371","https://openalex.org/W2963862692","https://openalex.org/W2964031251","https://openalex.org/W2964047251","https://openalex.org/W2970490659","https://openalex.org/W2976958311","https://openalex.org/W2981382427","https://openalex.org/W2996067004","https://openalex.org/W2999913149","https://openalex.org/W3018252856","https://openalex.org/W3027810930","https://openalex.org/W3035184894","https://openalex.org/W3037338360","https://openalex.org/W3049059782","https://openalex.org/W3090091013","https://openalex.org/W3100231902","https://openalex.org/W3102511045","https://openalex.org/W3104969455","https://openalex.org/W3162220519","https://openalex.org/W3172995164","https://openalex.org/W3191067499","https://openalex.org/W3203457432","https://openalex.org/W3213800969","https://openalex.org/W4211030719","https://openalex.org/W4226038297","https://openalex.org/W4241153335","https://openalex.org/W4244701402","https://openalex.org/W4249716558","https://openalex.org/W4287343372","https://openalex.org/W4287605751","https://openalex.org/W4287865060","https://openalex.org/W4288028699","https://openalex.org/W4289293816","https://openalex.org/W4292779060","https://openalex.org/W4293717613","https://openalex.org/W4294175339","https://openalex.org/W4295116634","https://openalex.org/W4299785371","https://openalex.org/W4300426530","https://openalex.org/W4367295640","https://openalex.org/W6637414576","https://openalex.org/W6638214083","https://openalex.org/W6639736602","https://openalex.org/W6690388216","https://openalex.org/W6692563690","https://openalex.org/W6692572551","https://openalex.org/W6713348437","https://openalex.org/W6734079340","https://openalex.org/W6735717691","https://openalex.org/W6738074204","https://openalex.org/W6739166439","https://openalex.org/W6739659843","https://openalex.org/W6740483536","https://openalex.org/W6741472820","https://openalex.org/W6745276634","https://openalex.org/W6747381837","https://openalex.org/W6748059092","https://openalex.org/W6748155593","https://openalex.org/W6748278370","https://openalex.org/W6748304690","https://openalex.org/W6748554400","https://openalex.org/W6748600614","https://openalex.org/W6748636771","https://openalex.org/W6748742374","https://openalex.org/W6749015878","https://openalex.org/W6749107692","https://openalex.org/W6752495264","https://openalex.org/W6752591435","https://openalex.org/W6755207826","https://openalex.org/W6755463368","https://openalex.org/W6756091659","https://openalex.org/W6756120399","https://openalex.org/W6756455746","https://openalex.org/W6757628700","https://openalex.org/W6758153731","https://openalex.org/W6759238893","https://openalex.org/W6763368363","https://openalex.org/W6767185500","https://openalex.org/W6767717172","https://openalex.org/W6768437808","https://openalex.org/W6769009116","https://openalex.org/W6771471661","https://openalex.org/W6772391699","https://openalex.org/W6773674915","https://openalex.org/W6778032888","https://openalex.org/W6778883912","https://openalex.org/W6779548206","https://openalex.org/W6779977647","https://openalex.org/W6784388680","https://openalex.org/W6791199311","https://openalex.org/W6795027320","https://openalex.org/W6804346497","https://openalex.org/W6840464181","https://openalex.org/W7046297837"],"related_works":["https://openalex.org/W2029932722","https://openalex.org/W3094963542","https://openalex.org/W4287625305","https://openalex.org/W2752159661","https://openalex.org/W2092244978","https://openalex.org/W2948488743","https://openalex.org/W4287714231","https://openalex.org/W3042560000","https://openalex.org/W2963334011","https://openalex.org/W3111449556"],"abstract_inverted_index":{"The":[0],"classical":[1],"statistical":[2,76],"learning":[3],"theory":[4],"implies":[5],"that":[6,86,104],"fitting":[7],"too":[8],"many":[9],"parameters":[10,29],"leads":[11],"to":[12,95,98,163],"overfitting":[13],"and":[14,33,103,112,145,217],"poor":[15],"performance.":[16],"That":[17],"modern":[18],"deep":[19,44,122,183],"neural":[20,123,131,184],"networks":[21,185],"generalize":[22],"well":[23],"despite":[24],"a":[25,35,99,136,147,176],"large":[26],"number":[27,197],"of":[28,43,67,78,109,138,152,182,190,198,210,221,230],"contradicts":[30],"this":[31,105],"finding":[32],"constitutes":[34],"major":[36],"unsolved":[37],"problem":[38],"towards":[39],"explaining":[40],"the":[41,51,64,68,75,90,118,142,153,179,187,191,196,208,214,222,228],"success":[42],"learning.":[45],"While":[46],"previous":[47],"work":[48],"focuses":[49],"on":[50,117,178,206,227],"implicit":[52,110],"regularization":[53,111],"induced":[54],"by":[55],"stochastic":[56],"gradient":[57,82],"descent":[58],"(SGD),":[59],"we":[60,133,174],"study":[61],"here":[62],"how":[63],"local":[65,72,91,143,148,218,234],"geometry":[66,92],"energy":[69],"landscape":[70],"around":[71,141,233],"minima":[73,144],"affects":[74],"properties":[77],"SGD":[79,94,162,215],"with":[80],"Gaussian":[81],"noise.":[83],"We":[84],"argue":[85],"under":[87],"reasonable":[88],"assumptions,":[89],"forces":[93],"stay":[96],"close":[97],"low":[100],"dimensional":[101],"subspace":[102],"induces":[106],"another":[107],"form":[108],"results":[113],"in":[114,165,213],"tighter":[115],"bounds":[116,129,160],"generalization":[119,127,180],"error":[120,128,181],"for":[121,130,161],"networks.":[124],"To":[125],"derive":[126,175],"networks,":[132],"first":[134],"introduce":[135],"notion":[137],"stagnation":[139,167,172],"sets":[140,168],"impose":[146],"essential":[149],"convexity":[150],"property":[151],"population":[154],"risk.":[155],"Under":[156],"these":[157,166],"conditions,":[158],"lower":[159],"remain":[164],"are":[169,204],"derived.":[170],"If":[171],"occurs,":[173],"bound":[177],"involving":[186],"spectral":[188],"norms":[189],"weight":[192],"matrices":[193],"but":[194],"not":[195],"network":[199],"parameters.":[200],"Technically,":[201],"our":[202],"proofs":[203],"based":[205,226],"controlling":[207],"change":[209],"parameter":[211],"values":[212],"iterates":[216],"uniform":[219],"convergence":[220],"empirical":[223],"loss":[224],"functions":[225],"entropy":[229],"suitable":[231],"neighborhoods":[232],"minima.":[235]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
