{"id":"https://openalex.org/W4416398388","doi":"https://doi.org/10.48550/arxiv.2510.07758","title":"R\u00e9nyi Sharpness: A Novel Sharpness that Strongly Correlates with Generalization","display_name":"R\u00e9nyi Sharpness: A Novel Sharpness that Strongly Correlates with Generalization","publication_year":2025,"publication_date":"2025-10-09","ids":{"openalex":"https://openalex.org/W4416398388","doi":"https://doi.org/10.48550/arxiv.2510.07758"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2510.07758","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.07758","pdf_url":"https://arxiv.org/pdf/2510.07758","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.07758","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112804295","display_name":"Qiaozhe Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qiaozhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101947767","display_name":"Jun Sun","orcid":"https://orcid.org/0000-0002-4059-7409"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101908749","display_name":"Ruijie Zhang","orcid":"https://orcid.org/0000-0001-7894-8705"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ruijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5104127568","display_name":"Yingzhuang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yingzhuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.3476000130176544,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.3476000130176544,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.1128000020980835,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.08060000091791153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7098000049591064},{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.590499997138977},{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.5383999943733215},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5051000118255615},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.49880000948905945},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4652000069618225},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.399399995803833}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7098000049591064},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6597999930381775},{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.590499997138977},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.5383999943733215},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5051000118255615},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.49880000948905945},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4652000069618225},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3889999985694885},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C117765406","wikidata":"https://www.wikidata.org/wiki/Q5362437","display_name":"Generalization error","level":3,"score":0.3375000059604645},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32589998841285706},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.32330000400543213},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.31929999589920044},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C2988416141","wikidata":"https://www.wikidata.org/wiki/Q6031139","display_name":"Information loss","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.07758","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.07758","pdf_url":"https://arxiv.org/pdf/2510.07758","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.07758","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.07758","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.07758","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.07758","pdf_url":"https://arxiv.org/pdf/2510.07758","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416398388.pdf","grobid_xml":"https://content.openalex.org/works/W4416398388.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sharpness":[0,221,228],"(of":[1],"the":[2,11,18,20,23,26,32,40,51,54,57,72,79,84,101,109,119,125,128,131,137,139,141,143,149,170,180,184,188,198,207,247,265],"loss":[3,85,102,134],"minima)":[4],"is":[5,42,69,104,243],"a":[6,61,217],"common":[7],"measure":[8],"to":[9,112,122,187,196,215,235],"investigate":[10],"generalization":[12,28,41,77,152,159],"of":[13,78,83,100,127,130,133,163,169,174,182,219,251],"neural":[14],"networks.":[15],"Intuitively":[16],"speaking,":[17],"flatter":[19],"landscape":[21],"near":[22],"minima":[24],"is,":[25],"better":[27],"might":[29],"be.":[30],"Unfortunately,":[31],"correlation":[33,200],"between":[34,53,151,206],"many":[35],"existing":[36,238],"sharpness":[37,63,209],"measures":[38],"and":[39,56,153,210],"usually":[43],"not":[44],"strong,":[45],"sometimes":[46],"even":[47],"weak.":[48],"To":[49,146],"close":[50],"gap":[52],"intuition":[55],"reality,":[58],"we":[59,94,117,156,213],"propose":[60,214],"novel":[62],"measure,":[64],"i.e.,":[65,226],"\\textit{R\u00e9nyi":[66,120],"sharpness},":[67],"which":[68,232],"defined":[70],"as":[71,91,177,179,222,258,260],"negative":[73],"R\u00e9nyi":[74,164,175,208,220,227],"entropy":[75],"(a":[76],"classical":[80,266],"Shannon":[81],"entropy)":[82],"Hessian.":[86,135],"The":[87],"main":[88],"ideas":[89],"are":[90,194],"follows:":[92],"1)":[93],"realize":[95],"that":[96,246],"\\textit{uniform}":[97],"(identical)":[98],"eigenvalues":[99,132],"Hessian":[103],"most":[105],"desirable":[106],"(while":[107],"keeping":[108],"sum":[110],"constant)":[111],"achieve":[113],"good":[114],"generalization;":[115],"2)":[116],"employ":[118],"entropy}":[121],"concisely":[123],"characterize":[124],"extent":[126],"spread":[129],"Normally,":[136],"larger":[138],"spread,":[140],"smaller":[142],"(R\u00e9nyi)":[144,154],"entropy.":[145],"rigorously":[147],"establish":[148],"relationship":[150],"sharpness,":[155,165,176],"provide":[157],"several":[158],"bounds":[160],"in":[161],"terms":[162],"by":[166],"taking":[167],"advantage":[168],"reparametrization":[171],"invariance":[172],"property":[173],"well":[178],"trick":[181],"translating":[183],"data":[185],"discrepancy":[186],"weight":[189],"perturbation.":[190],"Furthermore,":[191],"extensive":[192],"experiments":[193],"conducted":[195],"verify":[197],"strong":[199],"(in":[201],"specific,":[202],"Kendall":[203],"rank":[204],"correlation)":[205],"generalization.":[211],"Moreover,":[212],"use":[216],"variant":[218],"regularizer":[223],"during":[224],"training,":[225],"Aware":[229],"Minimization":[230],"(RSAM),":[231],"turns":[233],"out":[234],"outperform":[236],"all":[237],"sharpness-aware":[239],"minimization":[240],"methods.":[241],"It":[242],"worthy":[244],"noting":[245],"test":[248],"accuracy":[249],"gain":[250],"our":[252],"proposed":[253],"RSAM":[254],"method":[255],"could":[256],"be":[257],"high":[259],"nearly":[261],"2.5\\%,":[262],"compared":[263],"against":[264],"SAM":[267],"method.":[268]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-11T00:00:00"}
