{"id":"https://openalex.org/W3014013215","doi":"https://doi.org/10.1109/jsait.2020.2983192","title":"A Fourier-Based Approach to Generalization and Optimization in Deep Learning","display_name":"A Fourier-Based Approach to Generalization and Optimization in Deep Learning","publication_year":2020,"publication_date":"2020-03-26","ids":{"openalex":"https://openalex.org/W3014013215","doi":"https://doi.org/10.1109/jsait.2020.2983192","mag":"3014013215"},"language":"en","primary_location":{"id":"doi:10.1109/jsait.2020.2983192","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jsait.2020.2983192","pdf_url":null,"source":{"id":"https://openalex.org/S4210211895","display_name":"IEEE Journal on Selected Areas in Information Theory","issn_l":"2641-8770","issn":["2641-8770"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Selected Areas in Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017160178","display_name":"Farzan Farnia","orcid":"https://orcid.org/0000-0002-6049-9232"},"institutions":[{"id":"https://openalex.org/I4210143601","display_name":"Decision Systems (United States)","ror":"https://ror.org/0434dpa13","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143601"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]},{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Farzan Farnia","raw_affiliation_strings":["Department of Electrical Engineering, Stanford University, Stanford, USA","Laboratory for Information and Decision Systems, Massachusetts Institute of Technology, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]},{"raw_affiliation_string":"Laboratory for Information and Decision Systems, Massachusetts Institute of Technology, Cambridge, USA","institution_ids":["https://openalex.org/I4210143601","https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047050773","display_name":"Jesse M. Zhang","orcid":"https://orcid.org/0000-0002-9970-0693"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jesse M. Zhang","raw_affiliation_strings":["Beacons AI, San Francisco, USA","Department of Electrical Engineering, Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Beacons AI, San Francisco, USA","institution_ids":[]},{"raw_affiliation_string":"Department of Electrical Engineering, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024072566","display_name":"David Tse","orcid":"https://orcid.org/0000-0003-1460-5900"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David N. Tse","raw_affiliation_strings":["Department of Electrical Engineering, Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5017160178"],"corresponding_institution_ids":["https://openalex.org/I4210143601","https://openalex.org/I63966007","https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.5302,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71642898,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"1","issue":"1","first_page":"145","last_page":"156"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.8004167079925537},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6819437742233276},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6636523008346558},{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.6223851442337036},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.5990504622459412},{"id":"https://openalex.org/keywords/activation-function","display_name":"Activation function","score":0.5585564970970154},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.528437614440918},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5230603814125061},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.49711373448371887},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.47931328415870667},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4666910767555237},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.456983357667923},{"id":"https://openalex.org/keywords/fourier-series","display_name":"Fourier series","score":0.452706515789032},{"id":"https://openalex.org/keywords/norm","display_name":"Norm (philosophy)","score":0.4502132534980774},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4175446629524231},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41683629155158997},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3728626072406769},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.13088583946228027}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.8004167079925537},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6819437742233276},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6636523008346558},{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.6223851442337036},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.5990504622459412},{"id":"https://openalex.org/C38365724","wikidata":"https://www.wikidata.org/wiki/Q4677469","display_name":"Activation function","level":3,"score":0.5585564970970154},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.528437614440918},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5230603814125061},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.49711373448371887},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.47931328415870667},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4666910767555237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.456983357667923},{"id":"https://openalex.org/C207864730","wikidata":"https://www.wikidata.org/wiki/Q179467","display_name":"Fourier series","level":2,"score":0.452706515789032},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.4502132534980774},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4175446629524231},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41683629155158997},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3728626072406769},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.13088583946228027},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jsait.2020.2983192","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jsait.2020.2983192","pdf_url":null,"source":{"id":"https://openalex.org/S4210211895","display_name":"IEEE Journal on Selected Areas in Information Theory","issn_l":"2641-8770","issn":["2641-8770"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Selected Areas in Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","score":0.6100000143051147,"id":"https://metadata.un.org/sdg/1"}],"awards":[{"id":"https://openalex.org/G1828296274","display_name":null,"funder_award_id":"CCF-1563098","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"}],"funders":[{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":92,"referenced_works":["https://openalex.org/W577198184","https://openalex.org/W607505555","https://openalex.org/W1518544712","https://openalex.org/W1542886316","https://openalex.org/W1720114023","https://openalex.org/W1811750039","https://openalex.org/W1992244412","https://openalex.org/W2103496339","https://openalex.org/W2113517874","https://openalex.org/W2117130368","https://openalex.org/W2123068691","https://openalex.org/W2144902422","https://openalex.org/W2156909104","https://openalex.org/W2160354932","https://openalex.org/W2160815625","https://openalex.org/W2163605009","https://openalex.org/W2166116275","https://openalex.org/W2253400648","https://openalex.org/W2514392868","https://openalex.org/W2553079770","https://openalex.org/W2579923771","https://openalex.org/W2592091545","https://openalex.org/W2623127191","https://openalex.org/W2732724430","https://openalex.org/W2739309726","https://openalex.org/W2741952635","https://openalex.org/W2800415562","https://openalex.org/W2803403308","https://openalex.org/W2807299122","https://openalex.org/W2884447957","https://openalex.org/W2919115771","https://openalex.org/W2923764619","https://openalex.org/W2950928354","https://openalex.org/W2951533962","https://openalex.org/W2951548721","https://openalex.org/W2962742960","https://openalex.org/W2962857907","https://openalex.org/W2963122491","https://openalex.org/W2963236897","https://openalex.org/W2963285844","https://openalex.org/W2963325933","https://openalex.org/W2963664410","https://openalex.org/W2963739978","https://openalex.org/W2963794891","https://openalex.org/W2963959597","https://openalex.org/W2967536008","https://openalex.org/W2972810859","https://openalex.org/W2974935988","https://openalex.org/W3014316192","https://openalex.org/W3018252856","https://openalex.org/W3118608800","https://openalex.org/W3137695714","https://openalex.org/W4205575619","https://openalex.org/W4206686222","https://openalex.org/W4236362309","https://openalex.org/W4285719527","https://openalex.org/W4288413101","https://openalex.org/W4294560781","https://openalex.org/W4297926695","https://openalex.org/W4298362508","https://openalex.org/W4300686774","https://openalex.org/W6616617004","https://openalex.org/W6631126388","https://openalex.org/W6637414576","https://openalex.org/W6638214083","https://openalex.org/W6676797890","https://openalex.org/W6678800051","https://openalex.org/W6681302627","https://openalex.org/W6683823733","https://openalex.org/W6684191040","https://openalex.org/W6691187937","https://openalex.org/W6725287276","https://openalex.org/W6726353972","https://openalex.org/W6726983090","https://openalex.org/W6730249524","https://openalex.org/W6732517885","https://openalex.org/W6733962499","https://openalex.org/W6736583452","https://openalex.org/W6739659843","https://openalex.org/W6740483536","https://openalex.org/W6741653254","https://openalex.org/W6741844306","https://openalex.org/W6748600614","https://openalex.org/W6751347407","https://openalex.org/W6752343131","https://openalex.org/W6753438819","https://openalex.org/W6760498417","https://openalex.org/W6763485134","https://openalex.org/W6765167281","https://openalex.org/W6767329639","https://openalex.org/W6767955557","https://openalex.org/W7034108470"],"related_works":["https://openalex.org/W3214206881","https://openalex.org/W2895097035","https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W2896257321","https://openalex.org/W4366280654","https://openalex.org/W3160167280","https://openalex.org/W4362706668","https://openalex.org/W4231621013","https://openalex.org/W3171021120"],"abstract_inverted_index":{"The":[0],"success":[1],"of":[2,68,90,144,151,164],"deep":[3],"neural":[4,22,45,73,131,153,171],"networks":[5,23,46,74,132,154],"stems":[6],"from":[7],"their":[8],"ability":[9],"to":[10,63,109,129],"generalize":[11],"well":[12],"on":[13],"real":[14],"data;":[15],"however,":[16],"et":[17],"al.":[18],"have":[19],"observed":[20],"that":[21,82,162,174],"can":[24,107,175],"easily":[25],"overfit":[26],"randomly-generated":[27],"labels.":[28,187],"This":[29,137],"observation":[30],"highlights":[31],"the":[32,65,87,103,149,165],"following":[33],"question:":[34],"why":[35],"do":[36],"gradient":[37,104],"methods":[38,70],"succeed":[39],"in":[40,83,170],"finding":[41],"generalizable":[42],"solutions":[43,50,173],"for":[44,124,147],"while":[47,183],"there":[48],"exist":[49],"with":[51,75,112,133,155],"poor":[52],"generalization":[53,66,114,121,138],"behavior?":[54],"In":[55],"this":[56],"work,":[57],"we":[58],"use":[59],"a":[60,119,141],"Fourier-based":[61,120],"approach":[62],"study":[64],"properties":[67,95],"gradient-based":[69],"over":[71],"2-layer":[72,130,152],"band-limited":[76,125],"activation":[77,135,157],"functions.":[78,136,158],"Our":[79],"results":[80,169],"indicate":[81],"such":[84],"settings":[85],"if":[86],"underlying":[88],"distribution":[89],"data":[91],"enjoys":[92],"nice":[93,113],"Fourier":[94,100],"including":[96],"bandlimitedness":[97],"and":[98],"bounded":[99],"norm,":[101],"then":[102],"descent":[105],"method":[106],"converge":[108],"local":[110],"minima":[111],"behavior.":[115],"We":[116,159],"also":[117],"establish":[118],"error":[122],"bound":[123,139],"function":[126],"spaces,":[127],"applicable":[128],"general":[134],"motivates":[140],"grouped":[142],"version":[143],"path":[145,167],"norms":[146,168],"measuring":[148],"complexity":[150],"ReLU-type":[156],"empirically":[160],"demonstrate":[161],"regularization":[163],"group":[166],"network":[172],"fit":[176],"true":[177],"labels":[178],"without":[179],"losing":[180],"test":[181],"accuracy":[182],"not":[184],"overfitting":[185],"random":[186]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
