{"id":"https://openalex.org/W4289655249","doi":"https://doi.org/10.1109/isit50566.2022.9834569","title":"Regularization-wise double descent: Why it occurs and how to eliminate it","display_name":"Regularization-wise double descent: Why it occurs and how to eliminate it","publication_year":2022,"publication_date":"2022-06-26","ids":{"openalex":"https://openalex.org/W4289655249","doi":"https://doi.org/10.1109/isit50566.2022.9834569"},"language":"en","primary_location":{"id":"doi:10.1109/isit50566.2022.9834569","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit50566.2022.9834569","pdf_url":null,"source":{"id":"https://openalex.org/S4363604560","display_name":"2022 IEEE International Symposium on Information Theory (ISIT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012797060","display_name":"Fatih Y\u0131lmaz","orcid":"https://orcid.org/0000-0001-7873-1979"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Fatih Furkan Yilmaz","raw_affiliation_strings":["Rice University,Dept. of Electrical and Computer Engineering","Dept. of Electrical and Computer Engineering, Rice University"],"affiliations":[{"raw_affiliation_string":"Rice University,Dept. of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Rice University","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003606899","display_name":"Reinhard Heckel","orcid":"https://orcid.org/0000-0002-2874-2984"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]},{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Reinhard Heckel","raw_affiliation_strings":["Rice University,Dept. of Electrical and Computer Engineering","Dept. of Electrical and Computer Engineering, Technical University of Munich","Dept. of Electrical and Computer Engineering, Rice University"],"affiliations":[{"raw_affiliation_string":"Rice University,Dept. of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Rice University","institution_ids":["https://openalex.org/I74775410"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5012797060"],"corresponding_institution_ids":["https://openalex.org/I74775410"],"apc_list":null,"apc_paid":null,"fwci":0.3118,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.49687567,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"426","last_page":"431"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.7896886467933655},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6534204483032227},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.639724612236023},{"id":"https://openalex.org/keywords/superposition-principle","display_name":"Superposition principle","score":0.541329026222229},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5294199585914612},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5291441082954407},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.41289445757865906},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3984738886356354},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.39722734689712524},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3278149962425232},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.1637938916683197}],"concepts":[{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.7896886467933655},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6534204483032227},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.639724612236023},{"id":"https://openalex.org/C27753989","wikidata":"https://www.wikidata.org/wiki/Q284885","display_name":"Superposition principle","level":2,"score":0.541329026222229},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5294199585914612},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5291441082954407},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.41289445757865906},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3984738886356354},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.39722734689712524},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3278149962425232},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.1637938916683197}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isit50566.2022.9834569","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit50566.2022.9834569","pdf_url":null,"source":{"id":"https://openalex.org/S4363604560","display_name":"2022 IEEE International Symposium on Information Theory (ISIT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W2809090039","https://openalex.org/W2886836477","https://openalex.org/W2922153390","https://openalex.org/W2948069823","https://openalex.org/W2963096987","https://openalex.org/W2963518130","https://openalex.org/W2967536008","https://openalex.org/W2970176397","https://openalex.org/W2984169168","https://openalex.org/W3006943693","https://openalex.org/W3007766676","https://openalex.org/W3008906732","https://openalex.org/W3009460743","https://openalex.org/W3034510050","https://openalex.org/W3034704745","https://openalex.org/W3035751893","https://openalex.org/W3046705928","https://openalex.org/W3049504369","https://openalex.org/W3099407272","https://openalex.org/W3104969455","https://openalex.org/W3130714358","https://openalex.org/W3133309875","https://openalex.org/W3137695714","https://openalex.org/W4206410067","https://openalex.org/W4288413101","https://openalex.org/W4378767342","https://openalex.org/W6740005241","https://openalex.org/W6752495264","https://openalex.org/W6755053738","https://openalex.org/W6755424776","https://openalex.org/W6760498417","https://openalex.org/W6762989574","https://openalex.org/W6763485134","https://openalex.org/W6763519550","https://openalex.org/W6767329639","https://openalex.org/W6771252938","https://openalex.org/W6773481208","https://openalex.org/W6774250809","https://openalex.org/W6774364462","https://openalex.org/W6774375107","https://openalex.org/W6774542976","https://openalex.org/W6779476633","https://openalex.org/W6779712411","https://openalex.org/W6781106788","https://openalex.org/W6785098454","https://openalex.org/W6790928385"],"related_works":["https://openalex.org/W1998698147","https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W4366280654","https://openalex.org/W3160167280","https://openalex.org/W4231621013","https://openalex.org/W4362706668","https://openalex.org/W3008318776","https://openalex.org/W1977633006","https://openalex.org/W2041416246"],"abstract_inverted_index":{"The":[0],"risk":[1,27,59,91],"of":[2,17,31,49,60,72,97,104,116,179],"overparameterized":[3],"models,":[4],"in":[5,77],"particular":[6],"deep":[7],"neural":[8,128],"networks,":[9],"is":[10,92],"often":[11],"double-descent":[12,38],"shaped":[13,90],"as":[14,28,46,69,176],"a":[15,29,47,70,87,95,126,152,177],"function":[16,30,71,178],"the":[18,26,32,58,73,105,113,140,144,180],"model":[19,106],"size.":[20],"Recently,":[21],"it":[22],"was":[23],"shown":[24],"that":[25,57,83,132,170],"early-stopping":[33],"time":[34],"can":[35,43,64,108,135],"also":[36],"be":[37,44,109,136],"shaped,":[39],"and":[40,79,107,130,146,155,163,168],"this":[41,53,122],"behavior":[42,68,175],"explained":[45],"super-position":[48],"bias-variance":[50,98],"tradeoffs.":[51],"In":[52],"paper,":[54],"we":[55,124,150],"show":[56,131],"explicit":[61],"L2-regularized":[62],"models":[63],"exhibit":[65,172],"double":[66,88,133,173],"descent":[67,89,134,174],"regularization":[74,114,141,181],"strength,":[75],"both":[76],"theory":[78],"practice.":[80],"We":[81],"find":[82],"for":[84,143],"linear":[85],"regression,":[86],"caused":[93],"by":[94,111,121,138],"superposition":[96],"tradeoffs":[99],"corresponding":[100],"to":[101],"different":[102],"parts":[103],"mitigated":[110],"scaling":[112],"strength":[115],"each":[117],"part":[118],"appropriately.":[119],"Motivated":[120],"result,":[123],"study":[125,151],"two-layer":[127],"network":[129],"eliminated":[137],"adjusting":[139],"strengths":[142],"first":[145],"second":[147],"layer.":[148],"Lastly,":[149],"5-layer":[153],"CNN":[154],"ResNet-18":[156],"trained":[157],"on":[158],"CIFAR-10":[159],"with":[160],"label":[161,166],"noise,":[162,167],"CIFAR-100":[164],"without":[165],"demonstrate":[169],"all":[171],"strength.":[182]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
