{"id":"https://openalex.org/W7092298369","doi":"https://doi.org/10.48550/arxiv.2510.14074","title":"Exact Dynamics of Multi-class Stochastic Gradient Descent","display_name":"Exact Dynamics of Multi-class Stochastic Gradient Descent","publication_year":2025,"publication_date":"2025-10-15","ids":{"openalex":"https://openalex.org/W7092298369","doi":"https://doi.org/10.48550/arxiv.2510.14074"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2510.14074","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.14074","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2510.14074","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Collins-Woodfin, Elizabeth","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Collins-Woodfin, Elizabeth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Seroussi, Inbar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seroussi, Inbar","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.891700029373169,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.891700029373169,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.04230000078678131,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.008700000122189522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/covariance","display_name":"Covariance","score":0.6988000273704529},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.6233999729156494},{"id":"https://openalex.org/keywords/isotropy","display_name":"Isotropy","score":0.4675000011920929},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.4244000017642975},{"id":"https://openalex.org/keywords/covariance-matrix","display_name":"Covariance matrix","score":0.398499995470047},{"id":"https://openalex.org/keywords/covariance-function","display_name":"Covariance function","score":0.38920000195503235},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.3862000107765198},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.383899986743927}],"concepts":[{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.6988000273704529},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6801999807357788},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.6233999729156494},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.588699996471405},{"id":"https://openalex.org/C184050105","wikidata":"https://www.wikidata.org/wiki/Q273163","display_name":"Isotropy","level":2,"score":0.4675000011920929},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.4244000017642975},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.40059998631477356},{"id":"https://openalex.org/C185142706","wikidata":"https://www.wikidata.org/wiki/Q1134404","display_name":"Covariance matrix","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C137250428","wikidata":"https://www.wikidata.org/wiki/Q5178897","display_name":"Covariance function","level":3,"score":0.38920000195503235},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.3862000107765198},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.383899986743927},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C65778772","wikidata":"https://www.wikidata.org/wiki/Q12345341","display_name":"Asymptotic distribution","level":3,"score":0.3458999991416931},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C205147927","wikidata":"https://www.wikidata.org/wiki/Q752718","display_name":"Asymptotic analysis","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C180877172","wikidata":"https://www.wikidata.org/wiki/Q5401390","display_name":"Estimation of covariance matrices","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.29170000553131104},{"id":"https://openalex.org/C85725439","wikidata":"https://www.wikidata.org/wiki/Q466686","display_name":"Anisotropy","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.2720000147819519},{"id":"https://openalex.org/C64812099","wikidata":"https://www.wikidata.org/wiki/Q176604","display_name":"Random matrix","level":3,"score":0.262800008058548},{"id":"https://openalex.org/C156778621","wikidata":"https://www.wikidata.org/wiki/Q1365748","display_name":"Spectrum (functional analysis)","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C135692309","wikidata":"https://www.wikidata.org/wiki/Q111124","display_name":"Square (algebra)","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2510.14074","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.14074","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2510.14074","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.14074","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,34,69,92,120,150],"develop":[1],"a":[2,13,39,61,65,82,130,147,155],"framework":[3],"for":[4,38,164],"analyzing":[5],"the":[6,45,49,52,55,71,86,97,100,104,107,110,138,165,169,185,192,212,217,220],"training":[7],"and":[8,51,81,116,141,168,207],"learning":[9],"rate":[10],"dynamics":[11,77],"on":[12],"variety":[14],"of":[15,42,44,60,67,74,90,99,103,106,112,133,184,197,216],"high-":[16],"dimensional":[17],"optimization":[18],"problems":[19,111],"trained":[20],"using":[21],"one-pass":[22],"stochastic":[23],"gradient":[24],"descent":[25],"(SGD)":[26],"with":[27,54,85,129,144,172,182],"data":[28,80,108,126],"generated":[29],"from":[30],"multiple":[31],"anisotropic":[32,101],"classes.":[33,91],"give":[35],"exact":[36,213],"expressions":[37],"large":[40,83,131,174],"class":[41,186],"functions":[43],"limiting":[46],"dynamics,":[47],"including":[48],"risk":[50],"overlap":[53],"true":[56],"signal,":[57],"in":[58,95,109,219],"terms":[59],"deterministic":[62],"solution":[63],"to":[64,78,178],"system":[66],"ODEs.":[68],"extend":[70],"existing":[72],"theory":[73],"high-dimensional":[75,221],"SGD":[76,176],"Gaussian-mixture":[79],"(growing":[84],"parameter":[87],"size)":[88],"number":[89],"then":[93],"investigate":[94],"detail":[96],"effect":[98],"structure":[102],"covariance":[105,127,142],"binary":[113],"logistic":[114],"regression":[115],"least":[117],"square":[118],"loss.":[119],"study":[121],"three":[122],"cases:":[123],"isotropic":[124],"covariances,":[125],"matrices":[128,143],"fraction":[132],"zero":[134],"eigenvalues":[135],"(denoted":[136],"as":[137],"zero-one":[139,166],"model),":[140],"spectra":[145],"following":[146],"power-law":[148,170],"distribution.":[149],"show":[151,211],"that":[152,188],"there":[153],"exists":[154],"structural":[156],"phase":[157],"transition.":[158],"In":[159],"particular,":[160],"we":[161],"demonstrate":[162],"that,":[163],"model":[167,171],"sufficiently":[173],"power,":[175],"tends":[177],"align":[179],"more":[180],"closely":[181],"values":[183],"mean":[187],"are":[189],"projected":[190],"onto":[191],"\"clean":[193],"directions\"":[194],"(i.e.,":[195],"directions":[196],"smaller":[198],"variance).":[199],"This":[200],"is":[201],"supported":[202],"by":[203],"both":[204],"numerical":[205],"simulations":[206],"analytical":[208],"studies,":[209],"which":[210],"asymptotic":[214],"behavior":[215],"loss":[218],"limit.":[222]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-18T00:00:00"}
