{"id":"https://openalex.org/W7131391136","doi":"https://doi.org/10.48550/arxiv.2602.19271","title":"Taming Preconditioner Drift: Unlocking the Potential of Second-Order Optimizers for Federated Learning on Non-IID Data","display_name":"Taming Preconditioner Drift: Unlocking the Potential of Second-Order Optimizers for Federated Learning on Non-IID Data","publication_year":2026,"publication_date":"2026-02-22","ids":{"openalex":"https://openalex.org/W7131391136","doi":"https://doi.org/10.48550/arxiv.2602.19271"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19271","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19271","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048532598","display_name":"Junkang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Junkang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126852105","display_name":"Fanhua Shang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang, Fanhua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126844873","display_name":"Hongying Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Hongying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126791676","display_name":"Jin Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071704046","display_name":"Weixin An","orcid":"https://orcid.org/0000-0002-9466-2851"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Weixin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126836277","display_name":"Yuanyuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yuanyuan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5048532598"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.8234999775886536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.8234999775886536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.04650000110268593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.023600000888109207,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preconditioner","display_name":"Preconditioner","score":0.722599983215332},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6574000120162964},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.6198999881744385},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5967000126838684},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5928000211715698},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5813999772071838},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.47909998893737793},{"id":"https://openalex.org/keywords/descent","display_name":"Descent (aeronautics)","score":0.4375999867916107}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7468000054359436},{"id":"https://openalex.org/C167431342","wikidata":"https://www.wikidata.org/wiki/Q1754327","display_name":"Preconditioner","level":3,"score":0.722599983215332},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6574000120162964},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.6198999881744385},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5967000126838684},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5928000211715698},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5813999772071838},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.47909998893737793},{"id":"https://openalex.org/C2776637919","wikidata":"https://www.wikidata.org/wiki/Q624380","display_name":"Descent (aeronautics)","level":2,"score":0.4375999867916107},{"id":"https://openalex.org/C164752517","wikidata":"https://www.wikidata.org/wiki/Q5570875","display_name":"Global optimization","level":2,"score":0.4348999857902527},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.4034000039100647},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.39910000562667847},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3700000047683716},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.367000013589859},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35910001397132874},{"id":"https://openalex.org/C157553263","wikidata":"https://www.wikidata.org/wiki/Q5168004","display_name":"Coordinate descent","level":2,"score":0.35269999504089355},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.30709999799728394},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.30230000615119934},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C56435381","wikidata":"https://www.wikidata.org/wiki/Q1196371","display_name":"Geometric transformation","level":3,"score":0.2858000099658966},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.260699987411499},{"id":"https://openalex.org/C108734733","wikidata":"https://www.wikidata.org/wiki/Q1172333","display_name":"Data synchronization","level":3,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19271","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19271","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Second-order":[0],"optimizers":[1],"can":[2],"significantly":[3],"accelerate":[4],"large-scale":[5],"training,":[6],"yet":[7],"their":[8],"naive":[9],"federated":[10,71],"variants":[11],"are":[12],"often":[13],"unstable":[14],"or":[15],"even":[16],"diverge":[17],"on":[18,146],"non-IID":[19],"data.":[20],"We":[21,115],"show":[22],"that":[23],"a":[24,63,89,107],"key":[25],"culprit":[26],"is":[27,151],"\\emph{preconditioner":[28,64],"drift}:":[29],"client-side":[30],"second-order":[31,72],"training":[32],"induces":[33],"heterogeneous":[34],"\\emph{curvature-defined":[35],"geometries}":[36],"(i.e.,":[37,101],"preconditioner":[38],"coordinate":[39],"systems),":[40],"and":[41,66,92,98,132,136],"server-side":[42],"model":[43],"averaging":[44],"updates":[45,105],"computed":[46],"under":[47,124],"incompatible":[48],"metrics,":[49],"corrupting":[50],"the":[51],"global":[52,90,96,108],"descent":[53],"direction.":[54],"To":[55],"address":[56],"this":[57],"geometric":[58],"mismatch,":[59],"we":[60],"propose":[61],"\\texttt{FedPAC},":[62],"alignment":[65],"correction}":[67],"framework":[68],"for":[69],"reliable":[70],"optimization.":[73],"\\texttt{FedPAC}":[74,128],"explicitly":[75],"decouples":[76],"parameter":[77],"aggregation":[78],"from":[79],"geometry":[80],"synchronization":[81],"by:":[82],"(i)":[83],"\\textbf{Alignment}":[84],"(i.e.,aggregating":[85],"local":[86,103],"preconditioners":[87],"into":[88],"reference":[91],"warm-starting":[93],"clients":[94],"via":[95],"preconditioner);":[97],"(ii)":[99],"\\textbf{Correction}":[100],"steering":[102],"preconditioned":[104,109],"using":[106],"direction":[110],"to":[111,141],"suppress":[112],"long-term":[113],"drift).":[114],"provide":[116],"drift-coupled":[117],"non-convex":[118],"convergence":[119],"guarantees":[120],"with":[121,148],"linear":[122],"speedup":[123],"partial":[125],"participation.":[126],"Empirically,":[127],"consistently":[129],"improves":[130],"stability":[131],"accuracy":[133,144],"across":[134],"vision":[135],"language":[137],"tasks,":[138],"achieving":[139],"up":[140],"$5.8\\%$":[142],"absolute":[143],"gain":[145],"CIFAR-100":[147],"ViTs.":[149],"Code":[150],"available":[152],"at":[153],"https://anonymous.4open.science/r/FedPAC-8B24.":[154]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
