{"id":"https://openalex.org/W2609037894","doi":"https://doi.org/10.1145/3055399.3055464","title":"Finding approximate local minima faster than gradient descent","display_name":"Finding approximate local minima faster than gradient descent","publication_year":2017,"publication_date":"2017-06-15","ids":{"openalex":"https://openalex.org/W2609037894","doi":"https://doi.org/10.1145/3055399.3055464","mag":"2609037894"},"language":"en","primary_location":{"id":"doi:10.1145/3055399.3055464","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3055399.3055464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007875487","display_name":"Naman Agarwal","orcid":"https://orcid.org/0000-0003-0320-0238"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Naman Agarwal","raw_affiliation_strings":["Princeton University, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048981295","display_name":"Zeyuan Allen-Zhu","orcid":"https://orcid.org/0000-0003-3002-089X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeyuan Allen-Zhu","raw_affiliation_strings":["IAS, USA"],"affiliations":[{"raw_affiliation_string":"IAS, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048046167","display_name":"Brian Bullins","orcid":null},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Bullins","raw_affiliation_strings":["Princeton University, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024431603","display_name":"Elad Hazan","orcid":"https://orcid.org/0000-0002-1566-3216"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elad Hazan","raw_affiliation_strings":["Princeton University, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101821970","display_name":"Tengyu Ma","orcid":"https://orcid.org/0000-0003-3916-5040"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tengyu Ma","raw_affiliation_strings":["Princeton University, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5007875487"],"corresponding_institution_ids":["https://openalex.org/I20089843"],"apc_list":null,"apc_paid":null,"fwci":19.3069,"has_fulltext":false,"cited_by_count":157,"citation_normalized_percentile":{"value":0.99425735,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1195","last_page":"1199"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.8518738746643066},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.7264212369918823},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.6620583534240723},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5484309196472168},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.530038595199585},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5089216232299805},{"id":"https://openalex.org/keywords/descent-direction","display_name":"Descent direction","score":0.5008220672607422},{"id":"https://openalex.org/keywords/descent","display_name":"Descent (aeronautics)","score":0.5002486705780029},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4883527457714081},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.46961313486099243},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.46233412623405457},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex optimization","score":0.4225316345691681},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.41092386841773987},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3555690348148346},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3492888808250427},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2326681911945343},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.10911458730697632}],"concepts":[{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.8518738746643066},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.7264212369918823},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.6620583534240723},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5484309196472168},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.530038595199585},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5089216232299805},{"id":"https://openalex.org/C116149140","wikidata":"https://www.wikidata.org/wiki/Q2070951","display_name":"Descent direction","level":4,"score":0.5008220672607422},{"id":"https://openalex.org/C2776637919","wikidata":"https://www.wikidata.org/wiki/Q624380","display_name":"Descent (aeronautics)","level":2,"score":0.5002486705780029},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4883527457714081},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.46961313486099243},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.46233412623405457},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.4225316345691681},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.41092386841773987},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3555690348148346},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3492888808250427},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2326681911945343},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.10911458730697632},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3055399.3055464","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3055399.3055464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1483062147","https://openalex.org/W1850240193","https://openalex.org/W1899249567","https://openalex.org/W1913791685","https://openalex.org/W1987083649","https://openalex.org/W1994616650","https://openalex.org/W1994974865","https://openalex.org/W2006903949","https://openalex.org/W2009941369","https://openalex.org/W2079135578","https://openalex.org/W2079705627","https://openalex.org/W2098841537","https://openalex.org/W2146502635","https://openalex.org/W2146989110","https://openalex.org/W2156005216","https://openalex.org/W2205007824","https://openalex.org/W2271982975","https://openalex.org/W2279984491","https://openalex.org/W2301983558","https://openalex.org/W2396019715","https://openalex.org/W2405479119","https://openalex.org/W2474090883","https://openalex.org/W2546420264","https://openalex.org/W2559655401","https://openalex.org/W2587673585","https://openalex.org/W2596180971","https://openalex.org/W2609037894","https://openalex.org/W2688160009","https://openalex.org/W2963156201","https://openalex.org/W2963369075","https://openalex.org/W2964106499","https://openalex.org/W2964160102","https://openalex.org/W2998508934","https://openalex.org/W4245780520"],"related_works":["https://openalex.org/W3105660007","https://openalex.org/W2076165463","https://openalex.org/W2373152179","https://openalex.org/W2024879731","https://openalex.org/W3178591032","https://openalex.org/W3130242411","https://openalex.org/W1967598881","https://openalex.org/W4382238529","https://openalex.org/W2752571780","https://openalex.org/W2963367716"],"abstract_inverted_index":{"We":[0],"design":[1],"a":[2,53,60,68],"non-convex":[3,73],"second-order":[4],"optimization":[5,64],"algorithm":[6,36,57],"that":[7,47],"is":[8,43],"guaranteed":[9],"to":[10,37,51,59],"return":[11],"an":[12,39],"approximate":[13,40],"local":[14,41],"minimum":[15,42],"in":[16,21,76],"time":[17,32],"which":[18],"scales":[19],"linearly":[20],"the":[22,26],"underlying":[23],"dimension":[24],"and":[25,71],"number":[27],"of":[28,34,48,63],"training":[29,67],"examples.":[30],"The":[31],"complexity":[33],"our":[35],"find":[38,52],"even":[44],"faster":[45],"than":[46],"gradient":[49],"descent":[50],"critical":[54],"point.":[55],"Our":[56],"applies":[58],"general":[61],"class":[62],"problems":[65],"including":[66],"neural":[69],"network":[70],"other":[72],"objectives":[74],"arising":[75],"machine":[77],"learning.":[78]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":21},{"year":2019,"cited_by_count":41},{"year":2018,"cited_by_count":20},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
