{"id":"https://openalex.org/W4309804320","doi":"https://doi.org/10.48550/arxiv.2211.11567","title":"Neural networks trained with SGD learn distributions of increasing complexity","display_name":"Neural networks trained with SGD learn distributions of increasing complexity","publication_year":2022,"publication_date":"2022-11-21","ids":{"openalex":"https://openalex.org/W4309804320","doi":"https://doi.org/10.48550/arxiv.2211.11567"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2211.11567","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.11567","pdf_url":"https://arxiv.org/pdf/2211.11567","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2211.11567","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046159570","display_name":"Maria Refinetti","orcid":"https://orcid.org/0000-0002-0148-8074"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Refinetti, Maria","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047108263","display_name":"Alessandro Ingrosso","orcid":"https://orcid.org/0000-0001-5430-7559"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ingrosso, Alessandro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5057039281","display_name":"Sebastian Goldt","orcid":"https://orcid.org/0000-0002-5799-7644"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goldt, Sebastian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5046159570"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simplicity","display_name":"Simplicity","score":0.7154748439788818},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6884716749191284},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6684925556182861},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.6476391553878784},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6301129460334778},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5793645977973938},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5606734156608582},{"id":"https://openalex.org/keywords/covariance","display_name":"Covariance","score":0.5248753428459167},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.442921906709671},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.43113645911216736},{"id":"https://openalex.org/keywords/universality","display_name":"Universality (dynamical systems)","score":0.4140707552433014},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23106259107589722},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09568941593170166}],"concepts":[{"id":"https://openalex.org/C2776372474","wikidata":"https://www.wikidata.org/wiki/Q508291","display_name":"Simplicity","level":2,"score":0.7154748439788818},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6884716749191284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6684925556182861},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.6476391553878784},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6301129460334778},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5793645977973938},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5606734156608582},{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.5248753428459167},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.442921906709671},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.43113645911216736},{"id":"https://openalex.org/C183992945","wikidata":"https://www.wikidata.org/wiki/Q2495574","display_name":"Universality (dynamical systems)","level":2,"score":0.4140707552433014},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23106259107589722},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09568941593170166},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2211.11567","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.11567","pdf_url":"https://arxiv.org/pdf/2211.11567","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2211.11567","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2211.11567","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2211.11567","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.11567","pdf_url":"https://arxiv.org/pdf/2211.11567","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W2368019753","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4297676672","https://openalex.org/W4281702477","https://openalex.org/W2333930193","https://openalex.org/W2737356002","https://openalex.org/W4378510483"],"abstract_inverted_index":{"The":[0],"ability":[1],"of":[2,112,127,152,165],"deep":[3,128],"neural":[4,27,73,114],"networks":[5,28,74,130,144],"to":[6,154],"generalise":[7],"well":[8],"even":[9,141],"when":[10],"they":[11],"interpolate":[12],"their":[13,82],"training":[14],"data":[15,47],"has":[16],"been":[17],"explained":[18],"using":[19,76,84],"various":[20],"\"simplicity":[21],"biases\".":[22],"These":[23],"theories":[24],"postulate":[25],"that":[26,72,139],"avoid":[29],"overfitting":[30],"by":[31],"first":[32,101],"learning":[33,41],"simple":[34],"functions,":[35],"say":[36],"a":[37,53,109,113,125],"linear":[38],"classifier,":[39],"before":[40],"more":[42],"complex,":[43],"non-linear":[44],"functions.":[45],"Meanwhile,":[46],"structure":[48],"is":[49,65],"also":[50],"recognised":[51],"as":[52],"key":[54],"ingredient":[55],"for":[56,162],"good":[57],"generalisation,":[58],"yet":[59,67],"its":[60,160],"role":[61],"in":[62,108,124,143,168],"simplicity":[63,105,156],"biases":[64,157],"not":[66],"understood.":[68],"Here,":[69],"we":[70],"show":[71,138],"trained":[75,116,134],"stochastic":[77],"gradient":[78],"descent":[79],"initially":[80],"classify":[81],"inputs":[83],"lower-order":[85],"input":[86],"statistics,":[87],"like":[88],"mean":[89],"and":[90,92,131,137,158],"covariance,":[91],"exploit":[93],"higher-order":[94],"statistics":[95],"only":[96],"later":[97],"during":[98],"training.":[99],"We":[100,120,148],"demonstrate":[102,122],"this":[103],"distributional":[104],"bias":[106],"(DSB)":[107],"solvable":[110],"model":[111],"network":[115],"on":[117,135,146],"synthetic":[118],"data.":[119],"empirically":[121],"DSB":[123,153],"range":[126],"convolutional":[129],"visual":[132],"transformers":[133],"CIFAR10,":[136],"it":[140],"holds":[142],"pre-trained":[145],"ImageNet.":[147],"discuss":[149],"the":[150,163],"relation":[151],"other":[155],"consider":[159],"implications":[161],"principle":[164],"Gaussian":[166],"universality":[167],"learning.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
