{"id":"https://openalex.org/W4406457831","doi":"https://doi.org/10.1109/bigdata62323.2024.10825094","title":"Mitigating Gradient Overlap in Deep Residual Networks with Gradient Normalization for Improved Non-Convex Optimization","display_name":"Mitigating Gradient Overlap in Deep Residual Networks with Gradient Normalization for Improved Non-Convex Optimization","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406457831","doi":"https://doi.org/10.1109/bigdata62323.2024.10825094"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017226844","display_name":"Ju\u2010Young Yun","orcid":"https://orcid.org/0000-0002-5874-6345"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Juyoung Yun","raw_affiliation_strings":["Stony Brook University,Department of Computer Science,New York,United States,11794"],"affiliations":[{"raw_affiliation_string":"Stony Brook University,Department of Computer Science,New York,United States,11794","institution_ids":["https://openalex.org/I59553526"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5017226844"],"corresponding_institution_ids":["https://openalex.org/I59553526"],"apc_list":null,"apc_paid":null,"fwci":1.9768,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86021285,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3831","last_page":"3837"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7898797988891602},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6756082773208618},{"id":"https://openalex.org/keywords/proximal-gradient-methods","display_name":"Proximal Gradient Methods","score":0.6018367409706116},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex optimization","score":0.5205619931221008},{"id":"https://openalex.org/keywords/gradient-method","display_name":"Gradient method","score":0.4975884258747101},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47951772809028625},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.4790438413619995},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4214627742767334},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38597571849823},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35313689708709717},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.33785179257392883},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.12675920128822327}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7898797988891602},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6756082773208618},{"id":"https://openalex.org/C10494615","wikidata":"https://www.wikidata.org/wiki/Q17086765","display_name":"Proximal Gradient Methods","level":4,"score":0.6018367409706116},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.5205619931221008},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.4975884258747101},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47951772809028625},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.4790438413619995},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4214627742767334},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38597571849823},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35313689708709717},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.33785179257392883},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.12675920128822327},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.6200000047683716,"display_name":"Climate action"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W1522301498","https://openalex.org/W1583837637","https://openalex.org/W1815076433","https://openalex.org/W1980287119","https://openalex.org/W2112796928","https://openalex.org/W2146502635","https://openalex.org/W2194775991","https://openalex.org/W2420085356","https://openalex.org/W2549139847","https://openalex.org/W2607662938","https://openalex.org/W2908510526","https://openalex.org/W2911495555","https://openalex.org/W2963446712","https://openalex.org/W2964137095","https://openalex.org/W2990138404","https://openalex.org/W3016045558","https://openalex.org/W3048030262","https://openalex.org/W3110386488","https://openalex.org/W3118608800","https://openalex.org/W4250482878","https://openalex.org/W4386900849","https://openalex.org/W4388195678","https://openalex.org/W4394829074","https://openalex.org/W4403746845","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6638545294","https://openalex.org/W6638667902","https://openalex.org/W6681435938","https://openalex.org/W6695676441","https://openalex.org/W6734171162","https://openalex.org/W6747381837","https://openalex.org/W6757817989","https://openalex.org/W6780226713","https://openalex.org/W6781575362","https://openalex.org/W6787972765","https://openalex.org/W6853203810","https://openalex.org/W6857862140","https://openalex.org/W6874189266"],"related_works":["https://openalex.org/W2963086517","https://openalex.org/W2564735875","https://openalex.org/W3123504125","https://openalex.org/W3147739796","https://openalex.org/W1983212821","https://openalex.org/W3127841625","https://openalex.org/W2811217697","https://openalex.org/W2364728921","https://openalex.org/W2348618647","https://openalex.org/W2916278477"],"abstract_inverted_index":{"In":[0],"deep":[1,22,115],"learning,":[2,116],"Residual":[3],"Networks":[4],"(ResNets)":[5],"have":[6],"proven":[7],"effective":[8],"in":[9,27,48,56,109,114,135],"addressing":[10],"the":[11,17,38,42,85,94,130],"vanishing":[12],"gradient":[13,32,81,86,131],"problem,":[14],"allowing":[15],"for":[16],"successful":[18],"training":[19,106],"of":[20,97],"very":[21],"networks.":[23],"However,":[24],"skip":[25,43],"connections":[26],"ResNets":[28],"can":[29,53,128],"lead":[30],"to":[31,79],"overlap,":[33],"where":[34,117,139],"gradients":[35,89],"from":[36],"both":[37],"learned":[39],"transformation":[40],"and":[41,92],"connection":[44],"combine,":[45],"potentially":[46],"resulting":[47],"overestimated":[49],"gradients.":[50,99],"This":[51],"overestimation":[52],"cause":[54],"inefficiencies":[55],"optimization,":[57],"as":[58,76],"some":[59],"updates":[60],"may":[61],"overshoot":[62],"optimal":[63,119],"regions,":[64],"affecting":[65],"weight":[66],"updates.":[67],"To":[68],"address":[69],"this,":[70],"we":[71],"examine":[72],"Z-score":[73],"Normalization":[74],"(ZNorm)":[75],"a":[77],"technique":[78],"manage":[80],"overlap.":[82],"ZNorm":[83,104,127],"adjusts":[84],"scale,":[87],"standardizing":[88],"across":[90],"layers":[91],"reducing":[93],"negative":[95],"impact":[96],"overlapping":[98],"Our":[100],"experiments":[101],"demonstrate":[102],"that":[103,126],"improves":[105],"process,":[107],"especially":[108],"non-convex":[110],"optimization":[111],"scenarios":[112],"common":[113],"finding":[118],"solutions":[120],"is":[121,141],"challenging.":[122],"These":[123],"findings":[124],"suggest":[125],"affect":[129],"flow,":[132],"enhancing":[133],"performance":[134],"large-scale":[136],"data":[137],"processing":[138],"accuracy":[140],"critical.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
