{"id":"https://openalex.org/W3036100489","doi":"https://doi.org/10.1137/18m1231559","title":"MultiComposite Nonconvex Optimization for Training Deep Neural Networks","display_name":"MultiComposite Nonconvex Optimization for Training Deep Neural Networks","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3036100489","doi":"https://doi.org/10.1137/18m1231559","mag":"3036100489"},"language":"en","primary_location":{"id":"doi:10.1137/18m1231559","is_oa":false,"landing_page_url":"https://doi.org/10.1137/18m1231559","pdf_url":null,"source":{"id":"https://openalex.org/S928796702","display_name":"SIAM Journal on Optimization","issn_l":"1052-6234","issn":["1052-6234","1095-7189"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025630034","display_name":"Ying Cui","orcid":"https://orcid.org/0000-0003-4173-5647"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ying Cui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108167001","display_name":"Ziyu He","orcid":"https://orcid.org/0000-0002-7611-312X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ziyu He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5002789695","display_name":"Jong\u2010Shi Pang","orcid":"https://orcid.org/0000-0002-2224-6787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jong-Shi Pang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5025630034"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7233,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.87594335,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"30","issue":"2","first_page":"1693","last_page":"1723"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5983192920684814},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5553610324859619},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5267909169197083},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5168805122375488},{"id":"https://openalex.org/keywords/piecewise","display_name":"Piecewise","score":0.43984344601631165},{"id":"https://openalex.org/keywords/matlab","display_name":"MATLAB","score":0.4244527816772461},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.41688984632492065},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.41359326243400574},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3379763960838318},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1857922077178955}],"concepts":[{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5983192920684814},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5553610324859619},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5267909169197083},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5168805122375488},{"id":"https://openalex.org/C164660894","wikidata":"https://www.wikidata.org/wiki/Q2037833","display_name":"Piecewise","level":2,"score":0.43984344601631165},{"id":"https://openalex.org/C2780365114","wikidata":"https://www.wikidata.org/wiki/Q169478","display_name":"MATLAB","level":2,"score":0.4244527816772461},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.41688984632492065},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.41359326243400574},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3379763960838318},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1857922077178955},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1137/18m1231559","is_oa":false,"landing_page_url":"https://doi.org/10.1137/18m1231559","pdf_url":null,"source":{"id":"https://openalex.org/S928796702","display_name":"SIAM Journal on Optimization","issn_l":"1052-6234","issn":["1052-6234","1095-7189"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Optimization","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1700293460","display_name":null,"funder_award_id":"IIS-1632971","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G449920173","display_name":null,"funder_award_id":"FA9550-18-1-0382","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1994616650","https://openalex.org/W2013595918","https://openalex.org/W2044830700","https://openalex.org/W2076063813","https://openalex.org/W2082746403","https://openalex.org/W2136922672","https://openalex.org/W2137983211","https://openalex.org/W2225235077","https://openalex.org/W2919115771","https://openalex.org/W2963190258","https://openalex.org/W2963433607","https://openalex.org/W2963470657","https://openalex.org/W2963562721","https://openalex.org/W2964210434","https://openalex.org/W2987707205","https://openalex.org/W2988569866","https://openalex.org/W3125537303","https://openalex.org/W4234552385"],"related_works":["https://openalex.org/W1017828488","https://openalex.org/W2098894960","https://openalex.org/W2086369498","https://openalex.org/W2079988241","https://openalex.org/W4249194825","https://openalex.org/W4232993046","https://openalex.org/W2386387936","https://openalex.org/W2079352224","https://openalex.org/W2040937631","https://openalex.org/W4300665566"],"abstract_inverted_index":{"We":[0],"present":[1],"in":[2,112,168],"this":[3],"paper":[4],"a":[5,15,29,48,58,109,118,196,211],"novel":[6],"deterministic":[7,173],"algorithmic":[8],"framework":[9,204],"that":[10,46,132],"enables":[11],"the":[12,20,40,87,91,102,122,141,164,178,182,188,200,203,218],"computation":[13],"of":[14,19,51,68,77,93,121,148,177,181,202,214],"directional":[16],"stationary":[17,52],"solution":[18,53],"empirical":[21],"deep":[22,60],"neural":[23,61],"network":[24],"training":[25,72,215],"problem":[26,32],"formulated":[27],"as":[28],"multicomposite":[30],"optimization":[31,189],"with":[33,99,185,210],"coupled":[34],"nonconvexity":[35],"and":[36,71,82,101,143],"nondifferentiability.":[37],"This":[38],"is":[39,54],"first":[41],"time":[42],"to":[43,138,152,187],"our":[44,171],"knowledge":[45],"such":[47],"sharp":[49],"kind":[50],"provably":[55],"computable":[56],"for":[57,64,108,146,205],"nonsmooth":[59],"network.":[62],"Allowing":[63],"arbitrary":[65,75,83,149],"finite":[66],"numbers":[67],"input":[69],"samples":[70,216],"layers,":[73],"an":[74,113],"number":[76,213],"neurons":[78],"within":[79],"each":[80,107],"layer,":[81],"piecewise":[84],"activation":[85],"functions,":[86],"proposed":[88],"approach":[89],"combines":[90],"methods":[92],"exact":[94],"penalization,":[95],"majorization-minimization,":[96],"gradient":[97],"projection":[98],"enhancements,":[100],"dual":[103],"semismooth":[104,123],"Newton":[105,124],"method,":[106],"particular":[110],"purpose":[111],"overall":[114],"computational":[115,142],"scheme.":[116],"While":[117],"routine":[119],"implementation":[120,136,198],"method":[125],"would":[126],"be":[127],"computationally":[128],"expensive,":[129],"we":[130],"show":[131],"careful":[133],"linear":[134],"algebraic":[135],"helps":[137],"greatly":[139],"reduce":[140],"storage":[144],"costs":[145],"problems":[147,190],"dimensions.":[150],"Contrary":[151],"existing":[153],"stochastic":[154],"approaches":[155],"which":[156],"provide":[157],"at":[158],"best":[159],"very":[160],"weak":[161],"guarantees":[162],"on":[163],"computed":[165,183],"solutions":[166,184],"obtained":[167],"practical":[169],"implementation,":[170],"rigorous":[172],"treatment":[174],"provides":[175],"guarantee":[176],"stationarity":[179],"properties":[180],"reference":[186],"being":[191],"solved.":[192],"Numerical":[193],"results":[194],"from":[195],"MATLAB":[197],"demonstrate":[199],"effectiveness":[201],"solving":[206],"reasonably":[207],"sized":[208],"networks":[209],"modest":[212],"(in":[217],"low":[219],"thousands).":[220]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
