{"id":"https://openalex.org/W3189987536","doi":"https://doi.org/10.24963/ijcai.2021/355","title":"On the Neural Tangent Kernel of Deep Networks with Orthogonal Initialization","display_name":"On the Neural Tangent Kernel of Deep Networks with Orthogonal Initialization","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3189987536","doi":"https://doi.org/10.24963/ijcai.2021/355","mag":"3189987536"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/355","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/355","pdf_url":"https://www.ijcai.org/proceedings/2021/0355.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0355.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101399361","display_name":"Wei Huang","orcid":"https://orcid.org/0000-0002-4817-8858"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Wei Huang","raw_affiliation_strings":["University of Technology, Sydney","University of Technology Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology, Sydney","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080452660","display_name":"Weitao Du","orcid":"https://orcid.org/0000-0001-7643-4671"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weitao Du","raw_affiliation_strings":["Northwestern University","Northwestern University, USA"],"affiliations":[{"raw_affiliation_string":"Northwestern University","institution_ids":[]},{"raw_affiliation_string":"Northwestern University, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073709711","display_name":"Richard Yi Da Xu","orcid":"https://orcid.org/0000-0003-2080-4762"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Richard Yi Da Xu","raw_affiliation_strings":["University of Technology, Sydney","University of Technology Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology, Sydney","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101399361"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":null,"apc_paid":null,"fwci":1.1683,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.78453883,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2577","last_page":"2583"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.8530831933021545},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6247018575668335},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6210748553276062},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5663750171661377},{"id":"https://openalex.org/keywords/tangent","display_name":"Tangent","score":0.5303868651390076},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5219337940216064},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.48948609828948975},{"id":"https://openalex.org/keywords/orthogonal-transformation","display_name":"Orthogonal transformation","score":0.476940780878067},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4602569043636322},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.42608579993247986},{"id":"https://openalex.org/keywords/orthogonality","display_name":"Orthogonality","score":0.42019155621528625},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4161009192466736},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35257112979888916},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.34995394945144653},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.32936182618141174},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.15905866026878357},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.10204628109931946},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08407625555992126}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.8530831933021545},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6247018575668335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6210748553276062},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5663750171661377},{"id":"https://openalex.org/C138187205","wikidata":"https://www.wikidata.org/wiki/Q131251","display_name":"Tangent","level":2,"score":0.5303868651390076},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5219337940216064},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.48948609828948975},{"id":"https://openalex.org/C54940322","wikidata":"https://www.wikidata.org/wiki/Q3997740","display_name":"Orthogonal transformation","level":2,"score":0.476940780878067},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4602569043636322},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.42608579993247986},{"id":"https://openalex.org/C17137986","wikidata":"https://www.wikidata.org/wiki/Q215067","display_name":"Orthogonality","level":2,"score":0.42019155621528625},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4161009192466736},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35257112979888916},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.34995394945144653},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.32936182618141174},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.15905866026878357},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.10204628109931946},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08407625555992126},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2021/355","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/355","pdf_url":"https://www.ijcai.org/proceedings/2021/0355.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/355","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/355","pdf_url":"https://www.ijcai.org/proceedings/2021/0355.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3189987536.pdf","grobid_xml":"https://content.openalex.org/works/W3189987536.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W2125930537","https://openalex.org/W2145339207","https://openalex.org/W2423689290","https://openalex.org/W2753358588","https://openalex.org/W2766678531","https://openalex.org/W2808197691","https://openalex.org/W2809090039","https://openalex.org/W2887597596","https://openalex.org/W2896457183","https://openalex.org/W2899748887","https://openalex.org/W2912322140","https://openalex.org/W2913473169","https://openalex.org/W2942052807","https://openalex.org/W2952204734","https://openalex.org/W2958636547","https://openalex.org/W2964065616","https://openalex.org/W2966173098","https://openalex.org/W2971043187","https://openalex.org/W2981407587","https://openalex.org/W2995941914","https://openalex.org/W2998843058","https://openalex.org/W3034522462","https://openalex.org/W3038074040","https://openalex.org/W3089353926","https://openalex.org/W4236077405","https://openalex.org/W4287829020","https://openalex.org/W4287902508","https://openalex.org/W4289436733","https://openalex.org/W4289494182","https://openalex.org/W4294205985","https://openalex.org/W4295151193","https://openalex.org/W6803771590","https://openalex.org/W6863994431"],"related_works":["https://openalex.org/W4226299596","https://openalex.org/W4309434778","https://openalex.org/W3214759249","https://openalex.org/W2984511682","https://openalex.org/W4283773090","https://openalex.org/W2183117064","https://openalex.org/W2146809663","https://openalex.org/W4232228426","https://openalex.org/W4246218487","https://openalex.org/W2365891864"],"abstract_inverted_index":{"The":[0,17],"prevailing":[1,159],"thinking":[2],"is":[3,37,50,124],"that":[4,22,105,142,184],"orthogonal":[5,25,89,116,144,198],"weights":[6,112],"are":[7,118,188],"crucial":[8],"to":[9,39,110,115,157,163,190],"enforcing":[10],"dynamical":[11,47],"isometry":[12,48],"and":[13,83,101,113],"speeding":[14],"up":[15,148],"training.":[16],"increase":[18],"in":[19,27,150,195],"learning":[20,203,208],"speed":[21,147,204],"results":[23],"from":[24],"initialization":[26,90,145,199],"linear":[28,193],"networks":[29,44,72],"has":[30],"been":[31,60],"well-proven.":[32],"However,":[33],"while":[34],"the":[35,46,52,68,121,129,143,151,158,179,186,202],"same":[36],"believed":[38],"also":[40],"hold":[41],"for":[42],"nonlinear":[43,196],"when":[45,120,185],"condition":[49],"satisfied,":[51],"training":[53,149],"dynamics":[54,69],"behind":[55],"this":[56,64],"contention":[57],"have":[58],"not":[59],"thoroughly":[61],"explored.":[62],"In":[63,161],"work,":[65],"we":[66,103,172],"study":[67],"of":[70,76,99,131],"ultra-wide":[71],"across":[73],"a":[74,97,174,192,206],"range":[75],"architectures,":[77],"including":[78],"Fully":[79],"Connected":[80],"Networks":[81,86],"(FCNs)":[82],"Convolutional":[84],"Neural":[85],"(CNNs)":[87],"with":[88,205],"via":[91],"neural":[92],"tangent":[93],"kernel":[94],"(NTK).":[95],"Through":[96],"series":[98],"propositions":[100],"lemmas,":[102],"prove":[104],"two":[106],"NTKs,":[107],"one":[108,114],"corresponding":[109],"Gaussian":[111],"weights,":[117],"equal":[119],"network":[122,135],"width":[123],"infinite.":[125],"Further,":[126],"during":[127],"training,":[128,171],"NTK":[130,152,180],"an":[132],"orthogonally-initialized":[133],"infinite-width":[134],"should":[136],"theoretically":[137],"remain":[138],"constant.":[139],"This":[140],"suggests":[141],"cannot":[146],"(lazy":[153],"training)":[154],"regime,":[155],"contrary":[156],"thoughts.":[160],"order":[162],"explore":[164],"under":[165],"what":[166],"circumstances":[167],"can":[168,200],"orthogonality":[169],"accelerate":[170],"conduct":[173],"thorough":[175],"empirical":[176],"investigation":[177],"outside":[178],"regime.":[181],"We":[182],"find":[183],"hyper-parameters":[187],"set":[189],"achieve":[191],"regime":[194],"activation,":[197],"improve":[201],"large":[207,211],"rate":[209],"or":[210],"depth.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
