{"id":"https://openalex.org/W4401608529","doi":"https://doi.org/10.1109/tpami.2024.3444002","title":"Sharpness-Aware Lookahead for Accelerating Convergence and Improving Generalization","display_name":"Sharpness-Aware Lookahead for Accelerating Convergence and Improving Generalization","publication_year":2024,"publication_date":"2024-08-15","ids":{"openalex":"https://openalex.org/W4401608529","doi":"https://doi.org/10.1109/tpami.2024.3444002","pmid":"https://pubmed.ncbi.nlm.nih.gov/39146156"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3444002","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3444002","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006231066","display_name":"Chengli Tan","orcid":"https://orcid.org/0000-0002-7091-898X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chengli Tan","raw_affiliation_strings":["School of Mathematics and Statistics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024880206","display_name":"Jiangshe Zhang","orcid":"https://orcid.org/0000-0002-8395-1180"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangshe Zhang","raw_affiliation_strings":["School of Mathematics and Statistics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014312349","display_name":"Junmin Liu","orcid":"https://orcid.org/0000-0002-1462-7248"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junmin Liu","raw_affiliation_strings":["School of Mathematics and Statistics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100687952","display_name":"Yihong Gong","orcid":"https://orcid.org/0000-0002-1793-5836"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihong Gong","raw_affiliation_strings":["College of Software Engineering, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"College of Software Engineering, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5006231066"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.4986,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6410294,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"46","issue":"12","first_page":"10375","last_page":"10388"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.8982999920845032,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.8982999920845032,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.8587999939918518,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.8327999711036682,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.736009418964386},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.69114750623703},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6726005673408508},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.547002375125885},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3562559485435486},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.34670698642730713},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19588756561279297}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.736009418964386},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69114750623703},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6726005673408508},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.547002375125885},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3562559485435486},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.34670698642730713},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19588756561279297},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3444002","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3444002","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:39146156","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39146156","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3899757686","display_name":null,"funder_award_id":"12371512","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4210075541","display_name":null,"funder_award_id":"62276208","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4976875354","display_name":null,"funder_award_id":"2024JC-JCQN-02","funder_id":"https://openalex.org/F4320336567","funder_display_name":"Natural Science Basic Research Program of Shaanxi Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336567","display_name":"Natural Science Basic Research Program of Shaanxi Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":90,"referenced_works":["https://openalex.org/W114517082","https://openalex.org/W194391859","https://openalex.org/W1632114991","https://openalex.org/W1861492603","https://openalex.org/W2064675550","https://openalex.org/W2108598243","https://openalex.org/W2112796928","https://openalex.org/W2194775991","https://openalex.org/W2552194003","https://openalex.org/W2565639579","https://openalex.org/W2776855315","https://openalex.org/W2904243021","https://openalex.org/W2912811302","https://openalex.org/W2949736877","https://openalex.org/W2962971773","https://openalex.org/W2963150697","https://openalex.org/W2963173418","https://openalex.org/W2964110616","https://openalex.org/W2964137095","https://openalex.org/W2980149079","https://openalex.org/W2992308087","https://openalex.org/W3018960895","https://openalex.org/W3093871477","https://openalex.org/W3094502228","https://openalex.org/W3096312061","https://openalex.org/W3118608800","https://openalex.org/W3138582970","https://openalex.org/W3149313975","https://openalex.org/W3207706861","https://openalex.org/W4206742934","https://openalex.org/W4250589301","https://openalex.org/W4282939141","https://openalex.org/W4307286264","https://openalex.org/W4312984277","https://openalex.org/W4320912214","https://openalex.org/W4379470700","https://openalex.org/W4380559103","https://openalex.org/W4385565434","https://openalex.org/W4386076062","https://openalex.org/W6631190155","https://openalex.org/W6637461557","https://openalex.org/W6638214083","https://openalex.org/W6638926925","https://openalex.org/W6726497184","https://openalex.org/W6726983090","https://openalex.org/W6727099177","https://openalex.org/W6736583452","https://openalex.org/W6737496325","https://openalex.org/W6738491991","https://openalex.org/W6747043858","https://openalex.org/W6747381837","https://openalex.org/W6747620207","https://openalex.org/W6748278370","https://openalex.org/W6749107692","https://openalex.org/W6756630351","https://openalex.org/W6757185729","https://openalex.org/W6757817989","https://openalex.org/W6758153731","https://openalex.org/W6766196973","https://openalex.org/W6769349878","https://openalex.org/W6771233102","https://openalex.org/W6778239092","https://openalex.org/W6780943123","https://openalex.org/W6783600611","https://openalex.org/W6784447870","https://openalex.org/W6784591795","https://openalex.org/W6785013134","https://openalex.org/W6787972765","https://openalex.org/W6790330898","https://openalex.org/W6790416159","https://openalex.org/W6790545018","https://openalex.org/W6796289114","https://openalex.org/W6803202288","https://openalex.org/W6803727029","https://openalex.org/W6810029475","https://openalex.org/W6810125279","https://openalex.org/W6810200297","https://openalex.org/W6810673746","https://openalex.org/W6838349824","https://openalex.org/W6839021495","https://openalex.org/W6849243157","https://openalex.org/W6849569567","https://openalex.org/W6849777495","https://openalex.org/W6853066937","https://openalex.org/W6853349992","https://openalex.org/W6853548913","https://openalex.org/W6853599841","https://openalex.org/W6854757378","https://openalex.org/W7034108470","https://openalex.org/W7052457178"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W3162204513","https://openalex.org/W2371138613","https://openalex.org/W2048963458","https://openalex.org/W43109613","https://openalex.org/W2359952343","https://openalex.org/W2239445980","https://openalex.org/W2080152487","https://openalex.org/W3083152911"],"abstract_inverted_index":{"Lookahead":[0,21,44],"is":[1,75,98,105,166],"a":[2,46,79],"popular":[3],"stochastic":[4],"optimizer":[5,48],"that":[6,49,55,168],"can":[7,180],"accelerate":[8],"the":[9,17,60,67,70,83,93,112,123,136,142,159,176,182,191,195],"training":[10,61,192],"process":[11,62],"of":[12,82,115,125,141,161,194],"deep":[13],"neural":[14,153],"networks.":[15],"However,":[16],"solutions":[18],"found":[19,27],"by":[20,28,77,100],"often":[22],"generalize":[23,56],"worse":[24],"than":[25,175],"those":[26],"its":[29],"base":[30,137,177,196],"optimizers,":[31],"such":[32],"as":[33,146,148,186],"SGD":[34],"and":[35,156],"Adam.":[36],"To":[37],"address":[38],"this":[39],"issue,":[40],"we":[41],"propose":[42],"Sharpness-Aware":[43,101],"(SALA),":[45],"novel":[47],"aims":[50],"to":[51,119,135],"identify":[52],"flat":[53,73],"minima":[54],"well.":[57],"SALA":[58,121,162,179],"divides":[59],"into":[63],"two":[64],"stages.":[65],"In":[66,92,117],"first":[68],"stage,":[69,95],"direction":[71],"towards":[72],"regions":[74],"determined":[76,99],"leveraging":[78],"quadratic":[80],"approximation":[81],"optimization":[84],"trajectory,":[85],"without":[86],"incurring":[87],"any":[88],"extra":[89],"computational":[90,173],"overhead.":[91],"second":[94],"however,":[96],"it":[97],"Minimization":[102],"(SAM),":[103],"which":[104,188],"particularly":[106],"effective":[107],"in":[108],"improving":[109],"generalization":[110,132,184],"at":[111],"terminal":[113],"phase":[114],"training.":[116],"contrast":[118],"Lookahead,":[120],"retains":[122],"benefits":[124],"accelerated":[126],"convergence":[127],"while":[128],"also":[129],"enjoying":[130],"superior":[131],"performance":[133,185],"compared":[134],"optimizer.":[138,197],"Theoretical":[139],"analysis":[140],"expected":[143],"excess":[144],"risk,":[145],"well":[147],"empirical":[149],"results":[150],"on":[151],"canonical":[152],"network":[154],"architectures":[155],"datasets,":[157],"demonstrate":[158],"advantages":[160],"over":[163],"Lookahead.":[164],"It":[165],"noteworthy":[167],"with":[169],"approximately":[170],"25%":[171],"more":[172],"overhead":[174],"optimizer,":[178],"achieve":[181],"same":[183],"SAM":[187],"requires":[189],"twice":[190],"budget":[193]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
