{"id":"https://openalex.org/W4385482883","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191204","title":"Gradient Descent Optimizes Normalization-Free ResNets","display_name":"Gradient Descent Optimizes Normalization-Free ResNets","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385482883","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191204"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10191204","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn54540.2023.10191204","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065652013","display_name":"Zongpeng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137189","display_name":"Center for Interdisciplinary Studies","ror":"https://ror.org/03whr7s66","country_code":"IN","type":"facility","lineage":["https://openalex.org/I4210137189"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","IN"],"is_corresponding":true,"raw_author_name":"Zongpeng Zhang","raw_affiliation_strings":["Academy for Advanced Interdisciplinary Studies, Peking University,Center for Data Science","National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University","Center for Data Science, Academy for Advanced Interdisciplinary Studies, Peking University"],"affiliations":[{"raw_affiliation_string":"Academy for Advanced Interdisciplinary Studies, Peking University,Center for Data Science","institution_ids":["https://openalex.org/I4210137189","https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Center for Data Science, Academy for Advanced Interdisciplinary Studies, Peking University","institution_ids":["https://openalex.org/I4210137189","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018304811","display_name":"Zenan Ling","orcid":"https://orcid.org/0000-0001-8047-0253"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zenan Ling","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI","EIC, Huazhong University of Science and Technology","National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University"],"affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"EIC, Huazhong University of Science and Technology","institution_ids":["https://openalex.org/I47720641"]},{"raw_affiliation_string":"National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101677729","display_name":"Tong Lin","orcid":"https://orcid.org/0000-0002-8112-0439"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Lin","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI","National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University"],"affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016399094","display_name":"Zhouchen Lin","orcid":"https://orcid.org/0000-0003-1493-7569"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhouchen Lin","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI","National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University","Institute for Artificial Intelligence, Peking University","Peng Cheng Laboratory"],"affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"National Key Lab. of General AI, School of Intelligence Science and Technology, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Institute for Artificial Intelligence, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Peng Cheng Laboratory","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5065652013"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I4210137189"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08126234,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.871179461479187},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8183481693267822},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.7543151378631592},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7078526020050049},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6921459436416626},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.49555671215057373},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.49372729659080505},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.486537367105484},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39649051427841187},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.08505332469940186}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.871179461479187},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8183481693267822},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.7543151378631592},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7078526020050049},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6921459436416626},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49555671215057373},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.49372729659080505},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.486537367105484},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39649051427841187},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.08505332469940186},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10191204","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn54540.2023.10191204","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3896424926","display_name":null,"funder_award_id":"62276004","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G738097966","display_name":null,"funder_award_id":"2022ZD0160302","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1677182931","https://openalex.org/W1993882792","https://openalex.org/W2112796928","https://openalex.org/W2117130368","https://openalex.org/W2194775991","https://openalex.org/W2302255633","https://openalex.org/W2750854727","https://openalex.org/W2921467542","https://openalex.org/W2966173098","https://openalex.org/W2970987681","https://openalex.org/W2981407587","https://openalex.org/W3010768098","https://openalex.org/W3035794324","https://openalex.org/W3118608800","https://openalex.org/W4255949318","https://openalex.org/W4287866983","https://openalex.org/W4293455017","https://openalex.org/W4297779039","https://openalex.org/W6631943919","https://openalex.org/W6638667902","https://openalex.org/W6734171162","https://openalex.org/W6743679106","https://openalex.org/W6751923770","https://openalex.org/W6755150206","https://openalex.org/W6755310813","https://openalex.org/W6756001544","https://openalex.org/W6756091659","https://openalex.org/W6757468910","https://openalex.org/W6758094429","https://openalex.org/W6760242218","https://openalex.org/W6763552679","https://openalex.org/W6767486985","https://openalex.org/W6772121514","https://openalex.org/W6774133109","https://openalex.org/W6774776664","https://openalex.org/W6779724054","https://openalex.org/W6787972765","https://openalex.org/W6788627230","https://openalex.org/W6838666263"],"related_works":["https://openalex.org/W2051058708","https://openalex.org/W4226466875","https://openalex.org/W2977257638","https://openalex.org/W4288095186","https://openalex.org/W3210541621","https://openalex.org/W4226299596","https://openalex.org/W4309434778","https://openalex.org/W3214759249","https://openalex.org/W2984511682","https://openalex.org/W4283773090"],"abstract_inverted_index":{"Recent":[0],"empirical":[1,58,148],"studies":[2],"observe":[3],"that":[4,78,104,131],"even":[5],"without":[6],"normalization,":[7],"a":[8,19,29,94,125],"deep":[9],"residual":[10,41],"network":[11],"can":[12,84,107],"be":[13,124],"trained":[14],"reliably.":[15],"We":[16],"call":[17],"such":[18],"structure":[20],"as":[21],"normalization-free":[22],"Residual":[23],"Networks":[24],"(N-F":[25],"ResNets),":[26],"which":[27],"add":[28],"learnable":[30],"parameter":[31,119],"<tex":[32,120],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[33,121],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\alpha$</tex>":[34,122],"to":[35,123],"control":[36],"the":[37,40,47,65,79,86,90,109,117,132],"scale":[38],"of":[39,44,69,89,134,142],"block":[42],"instead":[43],"normalization.":[45],"However,":[46],"theoretical":[48,67,155],"understanding":[49,68],"on":[50,150],"N-F":[51,70,99,105,135],"ResNets":[52,71,106,136,143],"is":[53],"still":[54],"limited":[55],"despite":[56],"their":[57],"success.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63,76,102,129],"provide":[64],"first":[66],"from":[72],"two":[73],"perspectives.":[74],"Firstly,":[75],"prove":[77,103],"gradient":[80,110],"descent":[81],"(GD)":[82],"algorithm":[83],"find":[85],"global":[87],"minimum":[88],"training":[91],"loss":[92],"at":[93],"linear":[95],"rate":[96],"for":[97],"over-parameterized":[98],"ResNets.":[100],"Secondly,":[101],"avoid":[108],"exploding":[111],"or":[112],"vanishing":[113],"problem,":[114],"by":[115],"initializing":[116],"key":[118],"small":[126],"constant.":[127],"Notably,":[128],"demonstrate":[130],"gradients":[133],"are":[137],"more":[138],"stable":[139],"than":[140],"those":[141],"with":[144],"Kaiming":[145],"initialization.":[146],"Moreover,":[147],"experiments":[149],"benchmark":[151],"datasets":[152],"verify":[153],"our":[154],"results.":[156]},"counts_by_year":[],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
