{"id":"https://openalex.org/W4377013551","doi":"https://doi.org/10.1137/21m145896x","title":"Nonlinear Gradient Mappings and Stochastic Optimization: A General Framework with Applications to Heavy-Tail Noise","display_name":"Nonlinear Gradient Mappings and Stochastic Optimization: A General Framework with Applications to Heavy-Tail Noise","publication_year":2023,"publication_date":"2023-05-16","ids":{"openalex":"https://openalex.org/W4377013551","doi":"https://doi.org/10.1137/21m145896x"},"language":"en","primary_location":{"id":"doi:10.1137/21m145896x","is_oa":false,"landing_page_url":"https://doi.org/10.1137/21m145896x","pdf_url":null,"source":{"id":"https://openalex.org/S928796702","display_name":"SIAM Journal on Optimization","issn_l":"1052-6234","issn":["1052-6234","1095-7189"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070836307","display_name":"Du\u0161an Jakoveti\u0107","orcid":"https://orcid.org/0000-0003-3497-5589"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":true,"raw_author_name":"Dus\u0306an Jakoveti\u0107","raw_affiliation_strings":["Faculty of Sciences, Department of Mathematics and Informatics, University of Novi Sad, Novi Sad, 21000, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Sciences, Department of Mathematics and Informatics, University of Novi Sad, Novi Sad, 21000, Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077197425","display_name":"Dragana Bajovi\u0107","orcid":"https://orcid.org/0000-0003-1783-8734"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Dragana Bajovi\u0107","raw_affiliation_strings":["Faculty of Technical Sciences, Department of Power, Electronic and Communication Engineering, University of Novi Sad, Novi Sad, 21000, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Technical Sciences, Department of Power, Electronic and Communication Engineering, University of Novi Sad, Novi Sad, 21000, Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019965945","display_name":"Anit Kumar Sahu","orcid":"https://orcid.org/0000-0002-4083-0418"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anit Kumar Sahu","raw_affiliation_strings":["Amazon Alexa AI, Seattle, WA USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI, Seattle, WA USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077268766","display_name":"Soummya Kar","orcid":"https://orcid.org/0000-0002-8060-5581"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soummya Kar","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA 15213-3890 USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA 15213-3890 USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021969767","display_name":"Nemanja Milo\u0161evi\u0107","orcid":"https://orcid.org/0000-0002-7598-9883"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Nemanja Milos\u0306evi\u0107","raw_affiliation_strings":["Faculty of Sciences, Department of Mathematics and Informatics, University of Novi Sad, Novi Sad, 21000, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Sciences, Department of Mathematics and Informatics, University of Novi Sad, Novi Sad, 21000, Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078640354","display_name":"Du\u0161an Stamenkovi\u0107","orcid":"https://orcid.org/0000-0002-0121-4591"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Dus\u0306an Stamenkovi\u0107","raw_affiliation_strings":["Faculty of Sciences, Department of Mathematics and Informatics, University of Novi Sad, Novi Sad, 21000, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Sciences, Department of Mathematics and Informatics, University of Novi Sad, Novi Sad, 21000, Serbia","institution_ids":["https://openalex.org/I170726198"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5070836307"],"corresponding_institution_ids":["https://openalex.org/I170726198"],"apc_list":null,"apc_paid":null,"fwci":1.049,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80598951,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"33","issue":"2","first_page":"394","last_page":"423"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.7321813702583313},{"id":"https://openalex.org/keywords/lipschitz-continuity","display_name":"Lipschitz continuity","score":0.7077360153198242},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.6726633310317993},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.5829747319221497},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.5770177841186523},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5485535264015198},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.5098685026168823},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5067313313484192},{"id":"https://openalex.org/keywords/convex-function","display_name":"Convex function","score":0.4906522333621979},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.47758153080940247},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4657868444919586},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.45662176609039307},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.3193601369857788},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.27552667260169983},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.17984700202941895},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.10965847969055176}],"concepts":[{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.7321813702583313},{"id":"https://openalex.org/C22324862","wikidata":"https://www.wikidata.org/wiki/Q652707","display_name":"Lipschitz continuity","level":2,"score":0.7077360153198242},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.6726633310317993},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.5829747319221497},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5770177841186523},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5485535264015198},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.5098685026168823},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5067313313484192},{"id":"https://openalex.org/C145446738","wikidata":"https://www.wikidata.org/wiki/Q319913","display_name":"Convex function","level":3,"score":0.4906522333621979},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.47758153080940247},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4657868444919586},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.45662176609039307},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.3193601369857788},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.27552667260169983},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.17984700202941895},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.10965847969055176},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1137/21m145896x","is_oa":false,"landing_page_url":"https://doi.org/10.1137/21m145896x","pdf_url":null,"source":{"id":"https://openalex.org/S928796702","display_name":"SIAM Journal on Optimization","issn_l":"1052-6234","issn":["1052-6234","1095-7189"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Optimization","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1816425981","display_name":null,"funder_award_id":"957337","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1498711961","https://openalex.org/W1991083751","https://openalex.org/W1992208280","https://openalex.org/W2045744861","https://openalex.org/W2080335539","https://openalex.org/W2083657634","https://openalex.org/W2108306501","https://openalex.org/W2114346036","https://openalex.org/W2153368486","https://openalex.org/W2153635508","https://openalex.org/W2168909589","https://openalex.org/W2612690371","https://openalex.org/W2937268935","https://openalex.org/W2962952793","https://openalex.org/W2963433607","https://openalex.org/W2972939971","https://openalex.org/W3029427046","https://openalex.org/W3123607434","https://openalex.org/W4252698487"],"related_works":["https://openalex.org/W2964170259","https://openalex.org/W3002546633","https://openalex.org/W4206119629","https://openalex.org/W2765682467","https://openalex.org/W4382937879","https://openalex.org/W2400034325","https://openalex.org/W4292651679","https://openalex.org/W2499557797","https://openalex.org/W4387635768","https://openalex.org/W2985014567"],"abstract_inverted_index":{".We":[0],"introduce":[1],"a":[2,54,77,136,151],"general":[3,65,143,188],"framework":[4,22,185,202],"for":[5,11,44,76,83,127,142],"nonlinear":[6,99,160],"stochastic":[7],"gradient":[8,15,69,85,215],"descent":[9],"(SGD)":[10],"the":[12,45,68,84,98,107,128,132,159,169,176],"scenarios":[13],"when":[14],"noise":[16,86,130],"exhibits":[17],"heavy":[18],"tails.":[19],"The":[20],"proposed":[21],"subsumes":[23],"several":[24,197],"popular":[25],"nonlinearity":[26,40,78],"choices,":[27],"like":[28],"clipped,":[29],"normalized,":[30],"signed,":[31],"or":[32,105],"quantized":[33],"gradient,":[34],"but":[35],"we":[36,73,156],"also":[37],"consider":[38],"novel":[39],"choices.":[41],"We":[42],"establish":[43],"considered":[46],"class":[47,152],"of":[48,93,153,192],"methods":[49],"strong":[50],"convergence":[51,171],"guarantees":[52],"assuming":[53],"strongly":[55],"convex":[56],"cost":[57,109],"function":[58],"with":[59,79,138,205,211],"Lipschitz":[60],"continuous":[61],"gradients":[62],"under":[63,194],"very":[64],"assumptions":[66],"on":[67,208],"noise.":[70],"Most":[71],"notably,":[72],"show":[74,157,181],"that,":[75,182],"bounded":[80],"outputs":[81],"and":[82,150,173],"that":[87,145,158],"may":[88],"not":[89],"have":[90],"finite":[91],"moments":[92],"order":[94],"greater":[95],"than":[96,189],"one,":[97],"SGD's":[100],"mean":[101],"squared":[102],"error":[103],"(MSE),":[104],"equivalently,":[106],"expected":[108],"function's":[110],"optimality":[111],"gap,":[112],"converges":[113],"to":[114],"zero":[115],"at":[116],"rate":[117,167],"\\(O(1/t^\\zeta":[118],")\\)":[119],",":[120],"\\(\\zeta":[121],"\\in":[122],"(0,1)\\)":[123],".":[124],"In":[125],"contrast,":[126],"same":[129],"setting,":[131],"linear":[133],"SGD":[134,161,193],"generates":[135],"sequence":[137],"unbounded":[139],"variances.":[140],"Furthermore,":[141],"nonlinearities":[144,199],"can":[146],"be":[147],"decoupled":[148],"componentwise":[149],"joint":[154],"nonlinearities,":[155],"asymptotically":[162],"(locally)":[163],"achieves":[164],"an":[165],"\\(O(1/t)\\)":[166],"in":[168],"weak":[170],"sense":[172],"explicitly":[174],"quantify":[175],"corresponding":[177],"asymptotic":[178],"variance.":[179],"Experiments":[180],"while":[183],"our":[184,201],"is":[186],"more":[187],"existing":[190],"studies":[191],"heavy-tail":[195,212],"noise,":[196],"easy-to-implement":[198],"from":[200],"are":[203],"competitive":[204],"state-of-the-art":[206],"alternatives":[207],"real":[209],"datasets":[210],"noises.Keywordsstochastic":[213],"optimizationstochastic":[214],"descentnonlinear":[216],"mappingheavy-tail":[217],"noiseconvergence":[218],"ratemean":[219],"square":[220],"analysisasymptotic":[221],"normalitystochastic":[222],"approximationMSC":[223],"codes90C1590C2565K0562L2068T05":[224]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2023-05-19T00:00:00"}
