{"id":"https://openalex.org/W7131371417","doi":"https://doi.org/10.48550/arxiv.2602.20937","title":"Extending $\u03bc$P: Spectral Conditions for Feature Learning Across Optimizers","display_name":"Extending $\u03bc$P: Spectral Conditions for Feature Learning Across Optimizers","publication_year":2026,"publication_date":"2026-02-24","ids":{"openalex":"https://openalex.org/W7131371417","doi":"https://doi.org/10.48550/arxiv.2602.20937"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.20937","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101997736","display_name":"Akshita Gupta","orcid":"https://orcid.org/0000-0001-8898-7767"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gupta, Akshita","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071682789","display_name":"Mari\u00e8me Ngom","orcid":"https://orcid.org/0000-0002-3240-569X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ngom, Marieme","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028293798","display_name":"Sam Foreman","orcid":"https://orcid.org/0000-0002-9981-0876"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foreman, Sam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126817644","display_name":"Venkatram Vishwanath","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vishwanath, Venkatram","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101997736"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.42660000920295715,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.42660000920295715,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08799999952316284,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06949999928474426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.6049000024795532},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5604000091552734},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5099999904632568},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4772000014781952},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4390999972820282},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.42730000615119934},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4018000066280365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7114999890327454},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.6049000024795532},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5604000091552734},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5336999893188477},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5099999904632568},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4772000014781952},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47099998593330383},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4390999972820282},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.42730000615119934},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2913999855518341},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2703999876976013},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25929999351501465}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.20937","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.20937","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20937","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.20937","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Several":[0],"variations":[1],"of":[2,18,24,34,53,64,138],"adaptive":[3],"first-order":[4],"and":[5,14,93,146,157],"second-order":[6],"optimization":[7,26],"methods":[8],"have":[9],"been":[10],"proposed":[11],"to":[12,31,41,58,79,82,110,123,131],"accelerate":[13],"scale":[15],"the":[16,32,60,65,70,103,168],"training":[17],"large":[19],"language":[20],"models.":[21,45],"The":[22],"performance":[23],"these":[25,180],"routines":[27],"is":[28,50,100,108],"highly":[29],"sensitive":[30],"choice":[33],"hyperparameters":[35],"(HPs),":[36],"which":[37,56],"are":[38],"computationally":[39],"expensive":[40],"tune":[42],"for":[43,91,97,134,167,179],"large-scale":[44],"Maximal":[46],"update":[47],"parameterization":[48,178],"$(\u03bc$P$)$":[49],"a":[51,74,84,128,135],"set":[52],"scaling":[54],"rules":[55],"aims":[57],"make":[59],"optimal":[61],"HPs":[62,71],"independent":[63],"model":[66,78,165],"size,":[67],"thereby":[68],"allowing":[69],"tuned":[72],"on":[73,113,153],"smaller":[75],"(computationally":[76],"cheaper)":[77],"be":[80],"transferred":[81],"train":[83],"larger,":[85],"target":[86],"model.":[87],"Despite":[88],"promising":[89],"results":[90],"SGD":[92],"Adam,":[94],"deriving":[95],"$\u03bc$P":[96,133,151],"other":[98],"optimizers":[99],"challenging":[101],"because":[102],"underlying":[104],"tensor":[105,124],"programming":[106],"approach":[107],"difficult":[109],"grasp.":[111],"Building":[112],"recent":[114],"work":[115],"that":[116],"introduced":[117],"spectral":[118],"conditions":[119],"as":[120],"an":[121],"alternative":[122],"programs,":[125],"we":[126,172],"propose":[127],"novel":[129],"framework":[130],"derive":[132],"broader":[136],"class":[137],"optimizers,":[139],"including":[140],"AdamW,":[141],"ADOPT,":[142],"LAMB,":[143],"Sophia,":[144],"Shampoo":[145],"Muon.":[147],"We":[148],"implement":[149],"our":[150],"derivations":[152],"multiple":[154],"benchmark":[155],"models":[156],"demonstrate":[158],"zero-shot":[159],"learning":[160],"rate":[161],"transfer":[162],"across":[163],"increasing":[164],"width":[166],"above":[169],"optimizers.":[170,181],"Further,":[171],"provide":[173],"empirical":[174],"insights":[175],"into":[176],"depth-scaling":[177]},"counts_by_year":[],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2026-02-26T00:00:00"}
