{"id":"https://openalex.org/W4379232471","doi":"https://doi.org/10.1137/21m1467134","title":"Efficient Global Optimization of Two-Layer ReLU Networks: Quadratic-Time Algorithms and Adversarial Training","display_name":"Efficient Global Optimization of Two-Layer ReLU Networks: Quadratic-Time Algorithms and Adversarial Training","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4379232471","doi":"https://doi.org/10.1137/21m1467134"},"language":"en","primary_location":{"id":"doi:10.1137/21m1467134","is_oa":true,"landing_page_url":"https://doi.org/10.1137/21m1467134","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1137/21m1467134","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011091286","display_name":"Yatong Bai","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yatong Bai","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, Berkeley, CA 94720 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, Berkeley, CA 94720 USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086755692","display_name":"Tanmay Gautam","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tanmay Gautam","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of California, Berkeley, Berkeley, CA 94720 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California, Berkeley, Berkeley, CA 94720 USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072786751","display_name":"Somayeh Sojoudi","orcid":"https://orcid.org/0000-0001-7177-7712"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Somayeh Sojoudi","raw_affiliation_strings":["Department of Mechanical Engineering and Department of Electrical Engineering and Computer Science, University of California, Berkeley, Berkeley, CA 94720 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering and Department of Electrical Engineering and Computer Science, University of California, Berkeley, Berkeley, CA 94720 USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4684,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.85355427,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"5","issue":"2","first_page":"446","last_page":"474"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.6616543531417847},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5980595946311951},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.5898492336273193},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5705563426017761},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5127266645431519},{"id":"https://openalex.org/keywords/quadratic-equation","display_name":"Quadratic equation","score":0.4942765235900879},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.47130286693573},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4494103491306305},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex optimization","score":0.4466905891895294},{"id":"https://openalex.org/keywords/quadratic-programming","display_name":"Quadratic programming","score":0.4423132538795471},{"id":"https://openalex.org/keywords/quadratic-growth","display_name":"Quadratic growth","score":0.42335814237594604},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.41146740317344666},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3919183313846588},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2726432681083679}],"concepts":[{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.6616543531417847},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5980595946311951},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.5898492336273193},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5705563426017761},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5127266645431519},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.4942765235900879},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.47130286693573},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4494103491306305},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.4466905891895294},{"id":"https://openalex.org/C81845259","wikidata":"https://www.wikidata.org/wiki/Q290117","display_name":"Quadratic programming","level":2,"score":0.4423132538795471},{"id":"https://openalex.org/C195956108","wikidata":"https://www.wikidata.org/wiki/Q7268362","display_name":"Quadratic growth","level":2,"score":0.42335814237594604},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.41146740317344666},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3919183313846588},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2726432681083679},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1137/21m1467134","is_oa":true,"landing_page_url":"https://doi.org/10.1137/21m1467134","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1137/21m1467134","is_oa":true,"landing_page_url":"https://doi.org/10.1137/21m1467134","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W1582774210","https://openalex.org/W1981276685","https://openalex.org/W2057624533","https://openalex.org/W2075660001","https://openalex.org/W2606768527","https://openalex.org/W2963966702","https://openalex.org/W3120740533","https://openalex.org/W4291653192","https://openalex.org/W4292363360"],"related_works":["https://openalex.org/W2112847829","https://openalex.org/W4293088966","https://openalex.org/W4297407962","https://openalex.org/W1878408459","https://openalex.org/W2753327353","https://openalex.org/W4297797735","https://openalex.org/W1496670269","https://openalex.org/W2917463375","https://openalex.org/W2108152589","https://openalex.org/W2074925460"],"abstract_inverted_index":{".The":[0],"nonconvexity":[1,201],"of":[2,51,95,120,140,202],"the":[3,14,49,72,93,116,126,131,150,164,167,178,203,215],"artificial":[4],"neural":[5],"network":[6],"(ANN)":[7],"training":[8,50,74,142,205,210,222],"landscape":[9,206],"brings":[10],"optimization":[11,218],"difficulties.":[12],"While":[13],"traditional":[15],"back-propagation":[16],"stochastic":[17],"gradient":[18],"descent":[19],"algorithm":[20,112],"and":[21,37,42,80,98,130,134,149,192,223],"its":[22],"variants":[23],"are":[24,38],"effective":[25],"in":[26],"certain":[27],"cases,":[28],"they":[29],"can":[30,55,123,244],"become":[31],"stuck":[32],"at":[33],"spurious":[34],"local":[35],"minima":[36],"sensitive":[39],"to":[40,64,137,185,194,220,231,246],"initializations":[41],"hyperparameters.":[43],"Recent":[44],"work":[45],"has":[46,76],"shown":[47],"that":[48,103,227],"a":[52,59,138,156],"ReLU-activated":[53],"ANN":[54,204],"be":[56],"reformulated":[57],"as":[58],"convex":[60,73,128,141,180,190,217,221,225],"program,":[61],"bringing":[62],"hope":[63],"globally":[65,197],"optimizing":[66],"interpretable":[67],"ANNs.":[68],"However,":[69],"naively":[70],"solving":[71,163],"formulation":[75,129],"an":[77,82,195],"exponential":[78],"complexity,":[79],"even":[81],"approximation":[83,97],"heuristic":[84],"requires":[85],"cubic":[86],"time.":[87],"In":[88],"this":[89,96],"work,":[90],"we":[91],"characterize":[92],"quality":[94],"develop":[99,224],"two":[100],"efficient":[101],"algorithms":[102],"train":[104,228],"ANNs":[105,229],"with":[106,158],"global":[107,145],"convergence":[108,146],"guarantees.":[109],"The":[110,173,200],"first":[111],"is":[113,147,171,183,211],"based":[114,176],"on":[115,177,238],"alternating":[117],"direction":[118],"method":[119],"multipliers.":[121],"It":[122,187],"solve":[124],"both":[125],"exact":[127],"approximate":[132,165],"counterpart,":[133],"it":[135],"generalizes":[136],"family":[139],"formulations.":[143],"Linear":[144],"achieved,":[148],"initial":[151],"several":[152],"iterations":[153],"often":[154],"yield":[155],"solution":[157],"high":[159],"prediction":[160],"accuracy.":[161],"When":[162],"formulation,":[166],"per-iteration":[168],"time":[169],"complexity":[170],"quadratic.":[172],"second":[174],"algorithm,":[175],"\"sampled":[179],"programs\"":[181],"theory,":[182],"simpler":[184],"implement.":[186],"solves":[188],"unconstrained":[189],"formulations":[191,226],"converges":[193],"approximately":[196],"optimal":[198],"classifier.":[199],"exacerbates":[207],"when":[208],"adversarial":[209,232],"considered.":[212],"We":[213],"apply":[214],"robust":[216,230],"theory":[219],"inputs.":[233],"Our":[234],"analysis":[235],"explicitly":[236],"focuses":[237],"one-hidden-layer":[239],"fully":[240],"connected":[241],"ANNs,":[242],"but":[243],"extend":[245],"more":[247],"sophisticated":[248],"architectures.Keywordsrobust":[249],"optimizationconvex":[250],"optimizationadversarial":[251],"trainingneural":[252],"networksMSC":[253],"codes68Q2582C3249M2946N1062M45":[254]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
