{"id":"https://openalex.org/W7128781377","doi":"https://doi.org/10.48550/arxiv.2602.11584","title":"Gradient Compression May Hurt Generalization: A Remedy by Synthetic Data Guided Sharpness Aware Minimization","display_name":"Gradient Compression May Hurt Generalization: A Remedy by Synthetic Data Guided Sharpness Aware Minimization","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128781377","doi":"https://doi.org/10.48550/arxiv.2602.11584"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11584","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125906663","display_name":"Yujie Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gu, Yujie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070646892","display_name":"Richeng Jin","orcid":"https://orcid.org/0000-0002-1480-585X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Richeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125964092","display_name":"Zhaoyang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhaoyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125991842","display_name":"Huaiyu Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Huaiyu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5125906663"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.8240000009536743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.8240000009536743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.028999999165534973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.023800000548362732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.7303000092506409},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.6840000152587891},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6055999994277954},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5903000235557556},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5497000217437744},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5400000214576721},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5180000066757202},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5142999887466431}],"concepts":[{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.7303000092506409},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7210999727249146},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.6840000152587891},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6055999994277954},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5903000235557556},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5497000217437744},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5400000214576721},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5343000292778015},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5180000066757202},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5142999887466431},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5034999847412109},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.46720001101493835},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3828999996185303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3296999931335449},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32429999113082886},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.30480000376701355},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.27970001101493835},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2587999999523163}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11584","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11584","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11584","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11584","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"It":[0],"is":[1,117,123,156],"commonly":[2],"believed":[3],"that":[4,26],"gradient":[5,27,62,75],"compression":[6,28,122],"in":[7,14,33,83],"federated":[8,34],"learning":[9],"(FL)":[10],"enjoys":[11],"significant":[12],"improvement":[13],"communication":[15,108],"efficiency":[16],"with":[17,69],"negligible":[18],"performance":[19],"degradation.":[20],"In":[21,125],"this":[22,126],"paper,":[23,127],"we":[24,128],"find":[25],"induces":[29],"sharper":[30],"loss":[31],"landscapes":[32],"learning,":[35],"particularly":[36],"under":[37],"non-IID":[38],"data":[39,95,140],"distributions,":[40],"which":[41,131],"suggests":[42],"hindered":[43,118],"generalization":[44],"capability.":[45],"The":[46,150],"recently":[47],"emerging":[48],"Sharpness":[49],"Aware":[50],"Minimization":[51],"(SAM)":[52],"effectively":[53],"searches":[54],"for":[55],"a":[56,61,111],"flat":[57],"minima":[58],"by":[59],"incorporating":[60],"ascent":[63],"step":[64],"(i.e.,":[65],"perturbing":[66],"the":[67,72,78,90,102,106,133,147,153],"model":[68,103,120,135],"gradients)":[70],"before":[71],"celebrated":[73],"stochastic":[74],"descent.":[76],"Nonetheless,":[77],"direct":[79],"application":[80],"of":[81,89,146,152],"SAM":[82],"FL":[84],"suffers":[85],"from":[86,105],"inaccurate":[87],"estimation":[88,145],"global":[91,134,148],"perturbation":[92],"due":[93],"to":[94,100,137,163],"heterogeneity.":[96],"Existing":[97],"approaches":[98],"propose":[99,129],"utilize":[101],"update":[104,121],"previous":[107],"round":[109],"as":[110],"rough":[112],"estimate.":[113],"However,":[114],"its":[115,165],"effectiveness":[116],"when":[119],"incorporated.":[124],"FedSynSAM,":[130],"leverages":[132],"trajectory":[136],"construct":[138],"synthetic":[139],"and":[141,158],"facilitates":[142],"an":[143],"accurate":[144],"perturbation.":[149],"convergence":[151],"proposed":[154],"algorithm":[155],"established,":[157],"extensive":[159],"experiments":[160],"are":[161],"conducted":[162],"validate":[164],"effectiveness.":[166]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-14T00:00:00"}
