{"id":"https://openalex.org/W4412534222","doi":"https://doi.org/10.1088/2632-2153/adf278","title":"Feature learning and generalization in deep networks with orthogonal weights","display_name":"Feature learning and generalization in deep networks with orthogonal weights","publication_year":2025,"publication_date":"2025-07-21","ids":{"openalex":"https://openalex.org/W4412534222","doi":"https://doi.org/10.1088/2632-2153/adf278"},"language":"en","primary_location":{"id":"doi:10.1088/2632-2153/adf278","is_oa":true,"landing_page_url":"https://doi.org/10.1088/2632-2153/adf278","pdf_url":null,"source":{"id":"https://openalex.org/S4210200687","display_name":"Machine Learning Science and Technology","issn_l":"2632-2153","issn":["2632-2153"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320083","host_organization_name":"IOP Publishing","host_organization_lineage":["https://openalex.org/P4310320083","https://openalex.org/P4310311669"],"host_organization_lineage_names":["IOP Publishing","Institute of Physics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning: Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1088/2632-2153/adf278","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104631719","display_name":"Hannah Day","orcid":"https://orcid.org/0000-0002-4496-5600"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hannah Day","raw_affiliation_strings":["Department of Physics, University of Illinois Urbana-Champaign, Urbana, IL 61801, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-4496-5600","affiliations":[{"raw_affiliation_string":"Department of Physics, University of Illinois Urbana-Champaign, Urbana, IL 61801, United States of America","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108821003","display_name":"Yonatan Kahn","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA","US"],"is_corresponding":true,"raw_author_name":"Yonatan Kahn","raw_affiliation_strings":["Vector Institute, Toronto, Ontario, Canada","Department of Physics, University of Illinois Urbana-Champaign, Urbana, IL 61801, United States of America","Department of Physics, University of Toronto"],"raw_orcid":"https://orcid.org/0000-0002-9379-1838","affiliations":[{"raw_affiliation_string":"Vector Institute, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I4210127509"]},{"raw_affiliation_string":"Department of Physics, University of Illinois Urbana-Champaign, Urbana, IL 61801, United States of America","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Department of Physics, University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077116838","display_name":"Daniel A. Roberts","orcid":"https://orcid.org/0000-0002-5755-2274"},"institutions":[{"id":"https://openalex.org/I4210151112","display_name":"Sequoia (United States)","ror":"https://ror.org/05y5sz883","country_code":"US","type":"company","lineage":["https://openalex.org/I4210151112"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel A Roberts","raw_affiliation_strings":["Sequoia Capital, Menlo Park, CA 94025, United States of America","Center for Theoretical Physics and Department of Physics, Massachusetts Institute of Technology, Cambridge, MA 02139, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-5755-2274","affiliations":[{"raw_affiliation_string":"Sequoia Capital, Menlo Park, CA 94025, United States of America","institution_ids":["https://openalex.org/I4210151112"]},{"raw_affiliation_string":"Center for Theoretical Physics and Department of Physics, Massachusetts Institute of Technology, Cambridge, MA 02139, United States of America","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5077116838","https://openalex.org/A5104631719","https://openalex.org/A5108821003"],"corresponding_institution_ids":["https://openalex.org/I157725225","https://openalex.org/I185261750","https://openalex.org/I4210127509","https://openalex.org/I4210151112","https://openalex.org/I63966007"],"apc_list":{"value":1600,"currency":"GBP","value_usd":1962},"apc_paid":{"value":1600,"currency":"GBP","value_usd":1962},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08141923,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"6","issue":"3","first_page":"035027","last_page":"035027"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9501000046730042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9501000046730042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9394999742507935,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7530828714370728},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.7163985967636108},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.622486412525177},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5587612986564636},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5030052065849304},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4964659810066223},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.43442147970199585},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32439088821411133},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.050290316343307495}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7530828714370728},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.7163985967636108},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.622486412525177},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5587612986564636},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5030052065849304},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4964659810066223},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.43442147970199585},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32439088821411133},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.050290316343307495},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1088/2632-2153/adf278","is_oa":true,"landing_page_url":"https://doi.org/10.1088/2632-2153/adf278","pdf_url":null,"source":{"id":"https://openalex.org/S4210200687","display_name":"Machine Learning Science and Technology","issn_l":"2632-2153","issn":["2632-2153"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320083","host_organization_name":"IOP Publishing","host_organization_lineage":["https://openalex.org/P4310320083","https://openalex.org/P4310311669"],"host_organization_lineage_names":["IOP Publishing","Institute of Physics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning: Science and Technology","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:f5f4396e7b35429fab48088fe060a89b","is_oa":true,"landing_page_url":"https://doaj.org/article/f5f4396e7b35429fab48088fe060a89b","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning: Science and Technology, Vol 6, Iss 3, p 035027 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1088/2632-2153/adf278","is_oa":true,"landing_page_url":"https://doi.org/10.1088/2632-2153/adf278","pdf_url":null,"source":{"id":"https://openalex.org/S4210200687","display_name":"Machine Learning Science and Technology","issn_l":"2632-2153","issn":["2632-2153"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320083","host_organization_name":"IOP Publishing","host_organization_lineage":["https://openalex.org/P4310320083","https://openalex.org/P4310311669"],"host_organization_lineage_names":["IOP Publishing","Institute of Physics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning: Science and Technology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337511","display_name":"High Energy Physics","ror":"https://ror.org/035m6g344"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1979729863","https://openalex.org/W1985692219","https://openalex.org/W2021115151","https://openalex.org/W3039141544","https://openalex.org/W3043622962","https://openalex.org/W3176723190","https://openalex.org/W3189987536","https://openalex.org/W4224115624","https://openalex.org/W4396692455","https://openalex.org/W6631190155","https://openalex.org/W6633240241","https://openalex.org/W6678818196","https://openalex.org/W6684191040","https://openalex.org/W6686373630","https://openalex.org/W6717556742","https://openalex.org/W6730172645","https://openalex.org/W6739901393","https://openalex.org/W6743969146","https://openalex.org/W6747683314","https://openalex.org/W6749180595","https://openalex.org/W6749897870","https://openalex.org/W6752356114","https://openalex.org/W6752495264","https://openalex.org/W6757817989","https://openalex.org/W6761496057","https://openalex.org/W6762312053","https://openalex.org/W6767489080","https://openalex.org/W6768348158","https://openalex.org/W6769099074","https://openalex.org/W6772166930","https://openalex.org/W6779792951","https://openalex.org/W6780226713","https://openalex.org/W6781459778","https://openalex.org/W6790951033","https://openalex.org/W6797283454","https://openalex.org/W6802500351","https://openalex.org/W6839702189","https://openalex.org/W6845253052","https://openalex.org/W6851846473","https://openalex.org/W6868961023","https://openalex.org/W6874454844","https://openalex.org/W6948022635","https://openalex.org/W6966937887"],"related_works":["https://openalex.org/W3162204513","https://openalex.org/W4375867731","https://openalex.org/W2371138613","https://openalex.org/W2905271011","https://openalex.org/W3164948662","https://openalex.org/W3048601286","https://openalex.org/W4289536128","https://openalex.org/W2965925734","https://openalex.org/W3153597579","https://openalex.org/W4298151006"],"abstract_inverted_index":{"Abstract":[0],"Fully-connected":[1],"deep":[2,169,193],"neural":[3,101],"networks":[4,32,51,62,170,196],"with":[5,39,52,63,171],"weights":[6,67],"initialized":[7,68],"from":[8,69],"independent":[9,81],"Gaussian":[10,146],"distributions":[11],"can":[12],"be":[13],"tuned":[14],"to":[15,55,84,174,188],"criticality,":[16],"which":[17,45,79],"prevents":[18],"the":[19,28,40,43,48,70,100,115,143,186,189,203],"exponential":[20],"growth":[21],"or":[22],"decay":[23],"of":[24,42,50,72,82,117,124,145,185,192],"signals":[25],"propagating":[26],"through":[27],"network.":[29],"However,":[30],"such":[31],"still":[33],"exhibit":[34],"fluctuations":[35,78],"that":[36,60,150],"grow":[37],"linearly":[38],"depth":[41,123,172],"network,":[44],"may":[46],"impair":[47],"training":[49,166],"width":[53],"comparable":[54,173],"depth.":[56],"We":[57,148,176],"show":[58],"analytically":[59],"rectangular":[61],"tanh":[64],"activations":[65],"and":[66,105,165,205],"ensemble":[71],"orthogonal":[73,195],"matrices":[74],"have":[75],"corresponding":[76],"preactivation":[77],"are":[80],"depth,":[83],"leading":[85,109],"order":[86,110],"in":[87,111,142,168],"inverse":[88,112],"width.":[89,175],"Moreover,":[90],"we":[91],"demonstrate":[92],"numerically":[93],"that,":[94],"at":[95,108,121],"initialization,":[96],"all":[97],"correlators":[98],"involving":[99],"tangent":[102],"kernel":[103],"(NTK)":[104],"its":[106],"descendants":[107],"width\u2014which":[113],"govern":[114],"evolution":[116],"observables":[118],"during":[119],"training\u2014saturate":[120],"a":[122],"<mml:math":[125],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[126],"overflow=\"scroll\">":[127],"<mml:mrow>":[128,129],"<mml:mo>\u223c</mml:mo>":[130],"</mml:mrow>":[131,133],"<mml:mn>20</mml:mn>":[132],"</mml:math>":[134],",":[135],"rather":[136],"than":[137],"growing":[138],"without":[139],"bound":[140],"as":[141],"case":[144],"initializations.":[147],"speculate":[149],"this":[151],"structure":[152],"preserves":[153],"finite-width":[154],"feature":[155],"learning":[156],"while":[157],"reducing":[158],"overall":[159],"noise,":[160],"thus":[161],"improving":[162],"both":[163],"generalization":[164],"speed":[167],"provide":[177],"some":[178],"experimental":[179],"justification":[180],"by":[181],"relating":[182],"empirical":[183],"measurements":[184],"NTK":[187],"superior":[190],"performance":[191],"non-linear":[194],"trained":[197],"under":[198],"full-batch":[199],"gradient":[200],"descent":[201],"on":[202],"MNIST":[204],"CIFAR-10":[206],"classification":[207],"tasks.":[208]},"counts_by_year":[],"updated_date":"2026-05-12T08:28:47.272897","created_date":"2025-10-10T00:00:00"}
