{"id":"https://openalex.org/W3202046707","doi":"https://doi.org/10.1137/21m1452512","title":"slimTrain---A Stochastic Approximation Method for Training Separable Deep Neural Networks","display_name":"slimTrain---A Stochastic Approximation Method for Training Separable Deep Neural Networks","publication_year":2022,"publication_date":"2022-08-01","ids":{"openalex":"https://openalex.org/W3202046707","doi":"https://doi.org/10.1137/21m1452512","mag":"3202046707"},"language":"en","primary_location":{"id":"doi:10.1137/21m1452512","is_oa":false,"landing_page_url":"https://doi.org/10.1137/21m1452512","pdf_url":null,"source":{"id":"https://openalex.org/S165512578","display_name":"SIAM Journal on Scientific Computing","issn_l":"1064-8275","issn":["1064-8275","1095-7197"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Scientific Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090161771","display_name":"Elizabeth Newman","orcid":"https://orcid.org/0000-0002-6309-7706"},"institutions":[{"id":"https://openalex.org/I150468666","display_name":"Emory University","ror":"https://ror.org/03czfpz43","country_code":"US","type":"education","lineage":["https://openalex.org/I150468666"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Elizabeth Newman","raw_affiliation_strings":["Emory University"],"affiliations":[{"raw_affiliation_string":"Emory University","institution_ids":["https://openalex.org/I150468666"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004996901","display_name":"Julianne Chung","orcid":"https://orcid.org/0000-0002-6760-4736"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Julianne Chung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054213578","display_name":"Matthias Chung","orcid":"https://orcid.org/0000-0001-7822-4539"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthias Chung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5076591073","display_name":"Lars Ruthotto","orcid":"https://orcid.org/0000-0003-0803-3299"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lars Ruthotto","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5090161771"],"corresponding_institution_ids":["https://openalex.org/I150468666"],"apc_list":null,"apc_paid":null,"fwci":0.9716,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.78745605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"44","issue":"4","first_page":"A2322","last_page":"A2348"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.8218939304351807},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6528723239898682},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.6121975779533386},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5528789758682251},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5269212126731873},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48617023229599},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.47881394624710083},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4652363359928131},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.45912373065948486},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27633586525917053}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.8218939304351807},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6528723239898682},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.6121975779533386},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5528789758682251},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5269212126731873},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48617023229599},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.47881394624710083},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4652363359928131},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.45912373065948486},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27633586525917053}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1137/21m1452512","is_oa":false,"landing_page_url":"https://doi.org/10.1137/21m1452512","pdf_url":null,"source":{"id":"https://openalex.org/S165512578","display_name":"SIAM Journal on Scientific Computing","issn_l":"1064-8275","issn":["1064-8275","1095-7197"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Scientific Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4066206688","display_name":null,"funder_award_id":"DMS-1723005","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4066358753","display_name":null,"funder_award_id":"20-023231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G5068271573","display_name":null,"funder_award_id":"DMS-1654175","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5161266350","display_name":null,"funder_award_id":"20RT0287","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G6854963444","display_name":null,"funder_award_id":"DMS-1751636","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1528703258","https://openalex.org/W1594234351","https://openalex.org/W1850361076","https://openalex.org/W1901129140","https://openalex.org/W1979893394","https://openalex.org/W1990381576","https://openalex.org/W1994530392","https://openalex.org/W1994616650","https://openalex.org/W2000769684","https://openalex.org/W2018089423","https://openalex.org/W2023901033","https://openalex.org/W2053277578","https://openalex.org/W2065288681","https://openalex.org/W2090636411","https://openalex.org/W2095984592","https://openalex.org/W2096840748","https://openalex.org/W2097897435","https://openalex.org/W2102486516","https://openalex.org/W2103496339","https://openalex.org/W2114424556","https://openalex.org/W2117686388","https://openalex.org/W2137645797","https://openalex.org/W2146502635","https://openalex.org/W2154579312","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2295492468","https://openalex.org/W2480907039","https://openalex.org/W2594594773","https://openalex.org/W2600297185","https://openalex.org/W2604272474","https://openalex.org/W2612252966","https://openalex.org/W2612972698","https://openalex.org/W2784733489","https://openalex.org/W2786232134","https://openalex.org/W2898721209","https://openalex.org/W2899283552","https://openalex.org/W2908541468","https://openalex.org/W2911495555","https://openalex.org/W2946462711","https://openalex.org/W2950804467","https://openalex.org/W2963397933","https://openalex.org/W2963433607","https://openalex.org/W2963755523","https://openalex.org/W2963941964","https://openalex.org/W2964303576","https://openalex.org/W2970971581","https://openalex.org/W2974916071","https://openalex.org/W2981103608","https://openalex.org/W2995576031","https://openalex.org/W3004658186","https://openalex.org/W3007809852","https://openalex.org/W3030916542","https://openalex.org/W3031689935","https://openalex.org/W3036680831","https://openalex.org/W3044941140","https://openalex.org/W3098011980","https://openalex.org/W3108655421","https://openalex.org/W3115910238","https://openalex.org/W3178968719","https://openalex.org/W3198749684"],"related_works":["https://openalex.org/W2140186469","https://openalex.org/W4280563792","https://openalex.org/W4318719684","https://openalex.org/W4318559728","https://openalex.org/W3183136280","https://openalex.org/W2775233965","https://openalex.org/W4311551265","https://openalex.org/W4360995913","https://openalex.org/W4381707502","https://openalex.org/W2609418570"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"have":[4],"shown":[5],"their":[6],"success":[7],"as":[8,28],"high-dimensional":[9],"function":[10,217],"approximators":[11],"in":[12,21,112,157,221],"many":[13,113,158],"applications;":[14],"however,":[15],"training":[16,24,83,132,231,244,282],"DNNs":[17,48,100,133],"can":[18,86,95,264],"be":[19,87,265,287],"challenging":[20],"general.":[22],"DNN":[23,121,159,166,230,243],"is":[25,151,260],"commonly":[26],"phrased":[27],"a":[29,50,79,127,168,174,203],"stochastic":[30,128],"optimization":[31,129],"problem":[32],"whose":[33],"challenges":[34,119],"include":[35],"nonconvexity,":[36],"nonsmoothness,":[37],"insufficient":[38],"regularization,":[39],"and":[40,61,89,106,142,206,224,238,262],"complicated":[41],"data":[42],"distributions.":[43],"Hence,":[44],"the":[45,65,97,118,138,154,165,195,209,234,240,246,269,279],"performance":[46],"of":[47,67,81,99,120,140,149,242,271,281],"on":[49,55,73,91,253],"given":[51],"task":[52],"depends":[53],"crucially":[54],"tuning":[56],"hyperparameters,":[57],"especially":[58],"learning":[59,204],"rates":[60],"regularization":[62,210],"parameters.":[63],"In":[64,212],"absence":[66],"theoretical":[68],"guidelines":[69],"or":[70,278],"prior":[71],"experience":[72],"similar":[74],"tasks,":[75],"this":[76],"requires":[77],"solving":[78,187],"series":[80],"repeated":[82],"problems":[84,102,283],"which":[85],"time-consuming":[88],"demanding":[90],"computational":[92,256],"resources.":[93],"This":[94,177],"limit":[96],"applicability":[98],"to":[101,137,152,181,245,274,286,289],"with":[103,134,233],"nonstandard,":[104],"complex,":[105],"scarce":[107],"datasets,":[108],"e.g.,":[109],"those":[110],"arising":[111,220],"scientific":[114],"applications.":[115],"To":[116],"remedy":[117],"training,":[122],"we":[123,163],"propose":[124],"\\tt":[125,198],"slimTrain,":[126],"method":[130,251],"for":[131,186,194],"reduced":[135],"sensitivity":[136,241],"choice":[139],"hyperparameters":[141],"fast":[143],"initial":[144,276],"convergence.":[145],"The":[146],"central":[147],"idea":[148],"slimTrain":[150,199,227],"exploit":[153],"separability":[155,178],"inherent":[156],"architectures;":[160],"that":[161,284],"is,":[162],"separate":[164],"into":[167],"nonlinear":[169],"feature":[170],"extractor":[171],"followed":[172],"by":[173,267],"linear":[175,196],"model.":[176],"allows":[179],"us":[180],"leverage":[182],"recent":[183],"advances":[184],"made":[185],"large-scale,":[188],"linear,":[189],"ill-posed":[190],"inverse":[191],"problems.":[192],"Crucially,":[193],"weights,":[197],"does":[200],"not":[201],"require":[202],"rate":[205],"automatically":[207],"adapts":[208],"parameter.":[211],"our":[213,250],"numerical":[214],"experiments":[215],"using":[216],"approximation":[218],"tasks":[219],"surrogate":[222],"modeling":[223],"dimensionality":[225],"reduction,":[226],"outperforms":[228],"existing":[229],"methods":[232],"recommended":[235],"hyperparameter":[236],"settings":[237],"reduces":[239],"remaining":[247],"hyperparameters.":[248,292],"Since":[249],"operates":[252],"mini-batches,":[254],"its":[255],"overhead":[257],"per":[258],"iteration":[259],"modest":[261],"savings":[263],"realized":[266],"reducing":[268],"number":[270,280],"iterations":[272],"(due":[273],"quicker":[275],"convergence)":[277],"need":[285],"solved":[288],"identify":[290],"effective":[291]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
