{"id":"https://openalex.org/W4225410174","doi":"https://doi.org/10.1109/isit50566.2022.9834388","title":"The Directional Bias Helps Stochastic Gradient Descent to Generalize in Kernel Regression Models","display_name":"The Directional Bias Helps Stochastic Gradient Descent to Generalize in Kernel Regression Models","publication_year":2022,"publication_date":"2022-06-26","ids":{"openalex":"https://openalex.org/W4225410174","doi":"https://doi.org/10.1109/isit50566.2022.9834388"},"language":"en","primary_location":{"id":"doi:10.1109/isit50566.2022.9834388","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit50566.2022.9834388","pdf_url":null,"source":{"id":"https://openalex.org/S4363604560","display_name":"2022 IEEE International Symposium on Information Theory (ISIT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2205.00061","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108303345","display_name":"Yiling Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yiling Luo","raw_affiliation_strings":["Georgia Institute of Technology,School of Industrial and Systems Engineering","School of Industrial and Systems Engineering, Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,School of Industrial and Systems Engineering","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"School of Industrial and Systems Engineering, Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014880531","display_name":"Xiaoming Huo","orcid":"https://orcid.org/0000-0003-0101-1206"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoming Huo","raw_affiliation_strings":["Georgia Institute of Technology,School of Industrial and Systems Engineering","School of Industrial and Systems Engineering, Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,School of Industrial and Systems Engineering","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"School of Industrial and Systems Engineering, Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005740286","display_name":"Yajun Mei","orcid":"https://orcid.org/0000-0002-1015-990X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yajun Mei","raw_affiliation_strings":["Georgia Institute of Technology,School of Industrial and Systems Engineering","School of Industrial and Systems Engineering, Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,School of Industrial and Systems Engineering","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"School of Industrial and Systems Engineering, Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5108303345"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.1046,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.24609106,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"678","last_page":"683"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.7650723457336426},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.6513463854789734},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.650907039642334},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5765601396560669},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.568328320980072},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.5519416928291321},{"id":"https://openalex.org/keywords/nonparametric-regression","display_name":"Nonparametric regression","score":0.5215089917182922},{"id":"https://openalex.org/keywords/kernel-regression","display_name":"Kernel regression","score":0.5016069412231445},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.47859808802604675},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.4782071113586426},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3843848705291748},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3701971769332886},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.31382542848587036},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.2793782353401184},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2515791058540344},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.13796767592430115},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.06522998213768005}],"concepts":[{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.7650723457336426},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6513463854789734},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.650907039642334},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5765601396560669},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.568328320980072},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5519416928291321},{"id":"https://openalex.org/C74127309","wikidata":"https://www.wikidata.org/wiki/Q3455886","display_name":"Nonparametric regression","level":3,"score":0.5215089917182922},{"id":"https://openalex.org/C200695384","wikidata":"https://www.wikidata.org/wiki/Q1739319","display_name":"Kernel regression","level":3,"score":0.5016069412231445},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.47859808802604675},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.4782071113586426},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3843848705291748},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3701971769332886},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.31382542848587036},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2793782353401184},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2515791058540344},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.13796767592430115},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.06522998213768005},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/isit50566.2022.9834388","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit50566.2022.9834388","pdf_url":null,"source":{"id":"https://openalex.org/S4363604560","display_name":"2022 IEEE International Symposium on Information Theory (ISIT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2205.00061","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.00061","pdf_url":"https://arxiv.org/pdf/2205.00061","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2205.00061","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.00061","pdf_url":"https://arxiv.org/pdf/2205.00061","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1770470487","https://openalex.org/W1968326035","https://openalex.org/W2006262236","https://openalex.org/W2034978228","https://openalex.org/W2121950477","https://openalex.org/W2124958607","https://openalex.org/W2129250947","https://openalex.org/W2146766088","https://openalex.org/W2153542182","https://openalex.org/W2155561932","https://openalex.org/W2189007323","https://openalex.org/W2194775991","https://openalex.org/W2591973785","https://openalex.org/W2767197849","https://openalex.org/W2776610675","https://openalex.org/W2804098832","https://openalex.org/W2809090039","https://openalex.org/W2886836477","https://openalex.org/W2924791586","https://openalex.org/W2964198904","https://openalex.org/W2996279083","https://openalex.org/W3011558670","https://openalex.org/W3104969455","https://openalex.org/W3105340263","https://openalex.org/W3135166055","https://openalex.org/W3148738583","https://openalex.org/W3162775926","https://openalex.org/W3191067499","https://openalex.org/W3191617243","https://openalex.org/W4225410174","https://openalex.org/W4250029775","https://openalex.org/W4287212796","https://openalex.org/W4288110775","https://openalex.org/W4288365839","https://openalex.org/W4289435181","https://openalex.org/W4298876635","https://openalex.org/W4301881812","https://openalex.org/W4366084840","https://openalex.org/W6613812410","https://openalex.org/W6687423640","https://openalex.org/W6745428499","https://openalex.org/W6746900977","https://openalex.org/W6747679287","https://openalex.org/W6752495264","https://openalex.org/W6762034364","https://openalex.org/W6771651061","https://openalex.org/W6771840336","https://openalex.org/W6775078174","https://openalex.org/W6791777456"],"related_works":["https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W4366280654","https://openalex.org/W3160167280","https://openalex.org/W4231621013","https://openalex.org/W4362706668","https://openalex.org/W3008318776","https://openalex.org/W1977633006","https://openalex.org/W2041416246","https://openalex.org/W3020853991"],"abstract_inverted_index":{"We":[0],"study":[1,125],"the":[2,25,32,48,51,56,60,65,77,82,91,130],"Stochastic":[3],"Gradient":[4,66],"Descent":[5,67],"(SGD)":[6],"algorithm":[7],"in":[8,13,24],"nonparametric":[9],"statistics:":[10],"kernel":[11,33],"regression":[12,27],"particular.":[14],"The":[15,112],"directional":[16,92],"bias":[17,93],"property":[18],"of":[19,50,59,114],"SGD,":[20],"which":[21],"is":[22,29,117,127],"known":[23],"linear":[26],"setting,":[28],"generalized":[30],"to":[31,55,81,89],"regression.":[34],"More":[35],"specifically,":[36],"we":[37],"prove":[38],"that":[39,53,79,126],"SGD":[40],"with":[41,69],"moderate":[42,71],"and":[43,122],"annealing":[44],"step-size":[45,74],"converges":[46,75],"along":[47,76],"direction":[49,78],"eigenvector":[52],"corresponds":[54,80],"largest":[57],"eigenvalue":[58],"Gram":[61],"matrix.":[62],"In":[63],"addition,":[64],"(GD)":[68],"a":[70,103,109,123],"or":[72],"small":[73],"smallest":[83],"eigenvalue.":[84],"These":[85],"facts":[86],"are":[87],"referred":[88],"as":[90],"properties;":[94],"they":[95],"may":[96],"interpret":[97],"how":[98],"an":[99],"SGD-computed":[100],"estimator":[101],"has":[102],"potentially":[104],"smaller":[105],"generalization":[106],"error":[107],"than":[108],"GD-computed":[110],"estimator.":[111],"application":[113],"our":[115],"theory":[116],"demonstrated":[118],"by":[119],"simulation":[120],"studies":[121],"case":[124],"based":[128],"on":[129],"FashionMNIST":[131],"dataset.":[132]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
