{"id":"https://openalex.org/W3044941140","doi":"https://doi.org/10.1137/20m1359511","title":"Train Like a (Var)Pro: Efficient Training of Neural Networks with Variable Projection","display_name":"Train Like a (Var)Pro: Efficient Training of Neural Networks with Variable Projection","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3044941140","doi":"https://doi.org/10.1137/20m1359511","mag":"3044941140"},"language":"en","primary_location":{"id":"doi:10.1137/20m1359511","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1359511","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1359511","source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1359511","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090161771","display_name":"Elizabeth Newman","orcid":"https://orcid.org/0000-0002-6309-7706"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Elizabeth Newman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076591073","display_name":"Lars Ruthotto","orcid":"https://orcid.org/0000-0003-0803-3299"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lars Ruthotto","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0003-0803-3299","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009518158","display_name":"Joseph Hart","orcid":"https://orcid.org/0000-0001-6932-7894"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joseph Hart","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110187827","display_name":"Bart van Bloemen Waanders","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bart van Bloemen Waanders","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5090161771"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3938,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.57369582,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"3","issue":"4","first_page":"1041","last_page":"1066"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11245","display_name":"Advanced Numerical Analysis Techniques","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6448185443878174},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.5361419320106506},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5195425152778625},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.5128195881843567},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45144152641296387},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4245862662792206},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35303473472595215},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33685237169265747},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24102085828781128}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6448185443878174},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.5361419320106506},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5195425152778625},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.5128195881843567},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45144152641296387},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4245862662792206},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35303473472595215},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33685237169265747},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24102085828781128}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1137/20m1359511","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1359511","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1359511","source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2007.13171","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2007.13171","pdf_url":"https://arxiv.org/pdf/2007.13171","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3044941140","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2007.13171.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:osti.gov:1834344","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1834344","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"doi:10.48550/arxiv.2007.13171","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2007.13171","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1137/20m1359511","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1359511","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1359511","source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2482313616","display_name":null,"funder_award_id":"2003941","funder_id":"https://openalex.org/F4320338291","funder_display_name":"Sandia National Laboratories"},{"id":"https://openalex.org/G7385372765","display_name":null,"funder_award_id":"DMS 1751636","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338291","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3044941140.pdf","grobid_xml":"https://content.openalex.org/works/W3044941140.grobid-xml"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W593406250","https://openalex.org/W1522301498","https://openalex.org/W1832379062","https://openalex.org/W1850361076","https://openalex.org/W1895355475","https://openalex.org/W1901129140","https://openalex.org/W1970563617","https://openalex.org/W1977271024","https://openalex.org/W1982784722","https://openalex.org/W1983916623","https://openalex.org/W1992208280","https://openalex.org/W1994616650","https://openalex.org/W2009354787","https://openalex.org/W2015503561","https://openalex.org/W2018089423","https://openalex.org/W2038105662","https://openalex.org/W2065288681","https://openalex.org/W2070027705","https://openalex.org/W2090636411","https://openalex.org/W2103496339","https://openalex.org/W2123224804","https://openalex.org/W2138261166","https://openalex.org/W2140237403","https://openalex.org/W2142113627","https://openalex.org/W2154579312","https://openalex.org/W2160815625","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2282649489","https://openalex.org/W2404865717","https://openalex.org/W2528626821","https://openalex.org/W2600297185","https://openalex.org/W2612252966","https://openalex.org/W2739833408","https://openalex.org/W2751781018","https://openalex.org/W2760972773","https://openalex.org/W2766207105","https://openalex.org/W2786232134","https://openalex.org/W2803629276","https://openalex.org/W2898721209","https://openalex.org/W2899283552","https://openalex.org/W2914331134","https://openalex.org/W2930930439","https://openalex.org/W2933175448","https://openalex.org/W2946462711","https://openalex.org/W2963397933","https://openalex.org/W2963433607","https://openalex.org/W2974916071","https://openalex.org/W2994751851","https://openalex.org/W3029645440","https://openalex.org/W3030102576","https://openalex.org/W3030916542","https://openalex.org/W3031420959","https://openalex.org/W3031689935","https://openalex.org/W3036680831","https://openalex.org/W3098011980","https://openalex.org/W3115910238","https://openalex.org/W3118608800","https://openalex.org/W3217247658"],"related_works":["https://openalex.org/W3134327638","https://openalex.org/W2765904043","https://openalex.org/W2810268014","https://openalex.org/W3198331841","https://openalex.org/W3173442189","https://openalex.org/W2889827408","https://openalex.org/W2344347982","https://openalex.org/W2996176259","https://openalex.org/W2297117576","https://openalex.org/W2809055160","https://openalex.org/W3095583255","https://openalex.org/W2540616960","https://openalex.org/W2966305713","https://openalex.org/W2883216837","https://openalex.org/W2947035977","https://openalex.org/W2903190332","https://openalex.org/W3017466908","https://openalex.org/W3138674686","https://openalex.org/W2948894819","https://openalex.org/W3108973796"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"have":[4],"achieved":[5],"state-of-the-art":[6,199],"performance":[7],"across":[8],"a":[9,136,185],"variety":[10],"of":[11,25,60,66,76,80,112,119,132,158],"traditional":[12],"machine":[13],"learning":[14],"tasks,":[15],"e.g.,":[16,40],"speech":[17],"recognition,":[18],"image":[19],"classification,":[20],"and":[21,47,94,116,210,233],"segmentation.":[22],"The":[23],"ability":[24],"DNNs":[26],"to":[27,41,48,108,162,179,244],"efficiently":[28,218],"approximate":[29],"high-dimensional":[30],"functions":[31,170],"has":[32],"also":[33],"motivated":[34],"their":[35],"use":[36,131],"in":[37,64,172,197,234],"scientific":[38],"applications,":[39],"solve":[42,122],"partial":[43],"differential":[44],"equations":[45],"(PDE)":[46],"generate":[49],"surrogate":[50,207],"models.":[51],"In":[52,201],"this":[53,101],"paper,":[54],"we":[55,128],"consider":[56],"the":[57,67,73,78,81,89,109,123,130,149,156,159,214],"supervised":[58],"training":[59,181],"DNNs,":[61],"which":[62,194],"arises":[63],"many":[65,198],"above":[68],"applications.":[69],"We":[70],"focus":[71],"on":[72],"central":[74],"problem":[75,103,125,216],"optimizing":[77],"weights":[79],"given":[82],"DNN":[83,186],"such":[84],"that":[85,154,183,230],"it":[86],"accurately":[87],"approximates":[88],"relation":[90],"between":[91],"observed":[92],"input":[93],"target":[95],"data.":[96],"Devising":[97],"effective":[98],"solvers":[99],"for":[100,140],"optimization":[102,124,215],"is":[104,148,190,195],"notoriously":[105],"challenging":[106],"due":[107],"large":[110],"number":[111],"weights,":[113],"non-convexity,":[114],"data-sparsity,":[115],"non-trivial":[117],"choice":[118],"hyperparameters.":[120],"To":[121],"more":[126,217],"efficiently,":[127],"propose":[129],"variable":[133],"projection":[134],"(VarPro),":[135],"method":[137,152],"originally":[138],"designed":[139],"separable":[141],"nonlinear":[142],"least-squares":[143],"problems.":[144],"Our":[145],"main":[146],"contribution":[147],"Gauss-Newton":[150],"VarPro":[151,160],"(GNvpro)":[153],"extends":[155],"reach":[157],"idea":[161],"non-quadratic":[163],"objective":[164],"functions,":[165],"most":[166],"notably,":[167],"cross-entropy":[168],"loss":[169],"arising":[171],"classification.":[173],"These":[174],"extensions":[175],"make":[176],"GNvpro":[177,212,227],"applicable":[178],"all":[180,235],"problems":[182],"involve":[184],"whose":[187],"last":[188],"layer":[189],"an":[191],"affine":[192],"mapping,":[193],"common":[196],"architectures.":[200],"our":[202],"four":[203],"numerical":[204],"experiments":[205],"from":[206],"modeling,":[208],"segmentation,":[209],"classification":[211],"solves":[213],"than":[219,240],"commonly-used":[220],"stochastic":[221],"gradient":[222],"descent":[223],"(SGD)":[224],"schemes.":[225],"Also,":[226],"finds":[228],"solutions":[229],"generalize":[231],"well,":[232],"but":[236],"one":[237],"example":[238],"better":[239],"well-tuned":[241],"SGD":[242],"methods,":[243],"unseen":[245],"data":[246],"points.":[247]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
