{"id":"https://openalex.org/W3107776541","doi":"https://doi.org/10.1137/20m1387821","title":"Dimensionality Reduction, Regularization, and Generalization in Overparameterized Regressions","display_name":"Dimensionality Reduction, Regularization, and Generalization in Overparameterized Regressions","publication_year":2022,"publication_date":"2022-02-10","ids":{"openalex":"https://openalex.org/W3107776541","doi":"https://doi.org/10.1137/20m1387821","mag":"3107776541"},"language":"en","primary_location":{"id":"doi:10.1137/20m1387821","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1387821","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1137/20m1387821","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109480414","display_name":"Ningyuan Teresa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ningyuan Teresa","raw_affiliation_strings":["Department of Applied Mathematics and Statistics, Johns Hopkins University, and Mathematical Institute for Data Science, Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics and Statistics, Johns Hopkins University, and Mathematical Institute for Data Science, Johns Hopkins University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025226631","display_name":"David W. Hogg","orcid":"https://orcid.org/0000-0003-2866-9403"},"institutions":[{"id":"https://openalex.org/I4387153999","display_name":"Flatiron Institute","ror":"https://ror.org/00sekdz59","country_code":null,"type":"nonprofit","lineage":["https://openalex.org/I4210107338","https://openalex.org/I4387153999"]},{"id":"https://openalex.org/I4210153546","display_name":"Flatiron Health (United States)","ror":"https://ror.org/0508h6p74","country_code":"US","type":"company","lineage":["https://openalex.org/I4210153546"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David W. Hogg","raw_affiliation_strings":["Flatiron Institute,"],"affiliations":[{"raw_affiliation_string":"Flatiron Institute,","institution_ids":["https://openalex.org/I4210153546","https://openalex.org/I4387153999"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035079908","display_name":"Soledad Villar","orcid":"https://orcid.org/0000-0003-4968-3829"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Soledad Villar","raw_affiliation_strings":["Department of Applied Mathematics and Statistics, Johns Hopkins University, and Mathematical Institute for Data Science, Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics and Statistics, Johns Hopkins University, and Mathematical Institute for Data Science, Johns Hopkins University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035079908"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7279,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.67391597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"4","issue":"1","first_page":"126","last_page":"152"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11324","display_name":"Spectroscopy Techniques in Biomedical and Chemical Research","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.7883741855621338},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.6695965528488159},{"id":"https://openalex.org/keywords/partial-least-squares-regression","display_name":"Partial least squares regression","score":0.6182997822761536},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.585529625415802},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5531309843063354},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.46956896781921387},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4582381248474121},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4254075586795807},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.42037510871887207},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.40360337495803833},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3999221920967102},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32612890005111694}],"concepts":[{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.7883741855621338},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.6695965528488159},{"id":"https://openalex.org/C22354355","wikidata":"https://www.wikidata.org/wiki/Q422009","display_name":"Partial least squares regression","level":2,"score":0.6182997822761536},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.585529625415802},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5531309843063354},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.46956896781921387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4582381248474121},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4254075586795807},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.42037510871887207},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.40360337495803833},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3999221920967102},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32612890005111694},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1137/20m1387821","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1387821","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2011.11477","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.11477","pdf_url":"https://arxiv.org/pdf/2011.11477","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1137/20m1387821","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1387821","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2690271170","display_name":null,"funder_award_id":"FA9550-18-1-7007","funder_id":"https://openalex.org/F4320338503","funder_display_name":"European Office of Aerospace Research and Development"},{"id":"https://openalex.org/G6603199472","display_name":null,"funder_award_id":"DMS 2044349","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338503","display_name":"European Office of Aerospace Research and Development","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":85,"referenced_works":["https://openalex.org/W7299809","https://openalex.org/W54422097","https://openalex.org/W1499012572","https://openalex.org/W1501882007","https://openalex.org/W1502922572","https://openalex.org/W1663830336","https://openalex.org/W1971248047","https://openalex.org/W1973117123","https://openalex.org/W1995168330","https://openalex.org/W2023832712","https://openalex.org/W2058505671","https://openalex.org/W2079775628","https://openalex.org/W2095687430","https://openalex.org/W2104969279","https://openalex.org/W2112507308","https://openalex.org/W2126999940","https://openalex.org/W2132549764","https://openalex.org/W2137225583","https://openalex.org/W2163529608","https://openalex.org/W2163614729","https://openalex.org/W2235688562","https://openalex.org/W2293844262","https://openalex.org/W2524946712","https://openalex.org/W2619479788","https://openalex.org/W2741990562","https://openalex.org/W2742231817","https://openalex.org/W2767023880","https://openalex.org/W2788106558","https://openalex.org/W2807842867","https://openalex.org/W2884552970","https://openalex.org/W2896549049","https://openalex.org/W2900331681","https://openalex.org/W2909859323","https://openalex.org/W2922153390","https://openalex.org/W2923764619","https://openalex.org/W2949506549","https://openalex.org/W2959995783","https://openalex.org/W2963022876","https://openalex.org/W2963077963","https://openalex.org/W2963165448","https://openalex.org/W2963207607","https://openalex.org/W2963518130","https://openalex.org/W2963564844","https://openalex.org/W2963747550","https://openalex.org/W2964153729","https://openalex.org/W2967536008","https://openalex.org/W2968611582","https://openalex.org/W2977400275","https://openalex.org/W2987280934","https://openalex.org/W2989948753","https://openalex.org/W2994081359","https://openalex.org/W2996067004","https://openalex.org/W3003867771","https://openalex.org/W3006861283","https://openalex.org/W3006890694","https://openalex.org/W3008906732","https://openalex.org/W3009460743","https://openalex.org/W3018252856","https://openalex.org/W3021154881","https://openalex.org/W3023518133","https://openalex.org/W3033802587","https://openalex.org/W3034704745","https://openalex.org/W3035252862","https://openalex.org/W3035701005","https://openalex.org/W3036723153","https://openalex.org/W3046374342","https://openalex.org/W3049413455","https://openalex.org/W3092198173","https://openalex.org/W3092477403","https://openalex.org/W3096003755","https://openalex.org/W3100156752","https://openalex.org/W3101482359","https://openalex.org/W3111350549","https://openalex.org/W3138817264","https://openalex.org/W3162003518","https://openalex.org/W3199276951","https://openalex.org/W3201018963","https://openalex.org/W3204815074","https://openalex.org/W4230107518","https://openalex.org/W4239202948","https://openalex.org/W4240385847","https://openalex.org/W4244398791","https://openalex.org/W4253069484","https://openalex.org/W4287865629","https://openalex.org/W4288413101"],"related_works":["https://openalex.org/W2579148721","https://openalex.org/W4387893611","https://openalex.org/W2347335694","https://openalex.org/W2091056927","https://openalex.org/W2067407580","https://openalex.org/W4317486777","https://openalex.org/W4389669152","https://openalex.org/W2038514069","https://openalex.org/W1967233468","https://openalex.org/W2009181529"],"abstract_inverted_index":{"Overparameterization":[0],"in":[1,160,220],"deep":[2,37],"learning":[3],"is":[4,153],"powerful:":[5],"Very":[6],"large":[7],"models":[8,27,106,200],"fit":[9],"the":[10,23,54,60,67,80,88,129,135,138,161,203,208,214,222,226,233,239],"training":[11,227,240],"data":[12,105,198,228],"perfectly":[13],"and":[14,65,140,168,184,207],"yet":[15],"often":[16],"generalize":[17],"well.":[18],"This":[19,259],"realization":[20],"brought":[21],"back":[22],"study":[24],"of":[25,56,62,87,131,137,174,187,210,238,247],"linear":[26,188],"for":[28,72,83,90,103,128,196,267],"regression,":[29],"including":[30,177],"ordinary":[31],"least":[32,181],"squares":[33,182],"(OLS),":[34],"which,":[35],"like":[36],"learning,":[38],"shows":[39],"a":[40,76,113,171],"\"double-descent\"":[41],"behavior:":[42],"(1)":[43],"The":[44,85],"risk":[45,68,82,89,130],"(expected":[46],"out-of-sample":[47],"prediction":[48],"error)":[49],"can":[50,92,108,229],"grow":[51],"arbitrarily":[52,154],"when":[53],"number":[55,61],"parameters":[57],"$p$":[58,71],"approaches":[59],"samples":[63],"$n$,":[64],"(2)":[66],"decreases":[69],"with":[70,95,112,170,213,244],"$p>n$,":[73],"sometimes":[74],"achieving":[75],"lower":[77],"value":[78],"than":[79],"lowest":[81],"$p<n$.":[84],"divergence":[86],"OLS":[91,152],"be":[93,110,265],"avoided":[94,111],"regularization.":[96],"In":[97],"this":[98],"work,":[99],"we":[100],"show":[101,145],"that":[102,146,218,254,261],"some":[104],"it":[107],"also":[109,118],"PCA-based":[114],"dimensionality":[115,147],"reduction":[116,148],"(PCA-OLS,":[117],"known":[119],"as":[120],"principal":[121,142],"component":[122],"regression).":[123],"We":[124,144,164,216],"provide":[125],"non-asymptotic":[126],"bounds":[127],"PCA-OLS":[132,166],"by":[133],"considering":[134],"alignments":[136],"population":[139,248],"empirical":[141],"components.":[143],"improves":[149],"robustness":[150],"while":[151],"susceptible":[155],"to":[156,201,205],"adversarial":[157],"attacks,":[158],"particularly":[159],"overparameterized":[162],"regime.":[163],"compare":[165],"theoretically":[167],"empirically":[169],"wide":[172],"range":[173],"projection-based":[175],"methods,":[176],"random":[178],"projections,":[179],"partial":[180],"(PLS),":[183],"certain":[185],"classes":[186],"two-layer":[189],"neural":[190],"networks.":[191],"These":[192],"comparisons":[193],"are":[194,235],"made":[195],"different":[197],"generation":[199],"assess":[202],"sensitivity":[204],"signal-to-noise":[206],"alignment":[209],"regression":[211],"coefficients":[212],"features.":[215],"find":[217],"methods":[219,231],"which":[221],"projection":[223],"depends":[224],"on":[225],"outperform":[230],"where":[232],"projections":[234],"chosen":[236],"independently":[237],"data,":[241],"even":[242],"those":[243],"oracle":[245],"knowledge":[246],"quantities,":[249],"another":[250],"seemingly":[251],"paradoxical":[252],"phenomenon":[253],"has":[255],"been":[256],"identified":[257],"previously.":[258],"suggests":[260],"overparameterization":[262],"may":[263],"not":[264],"necessary":[266],"good":[268],"generalization.":[269]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-12-07T00:00:00"}
