{"id":"https://openalex.org/W7161023354","doi":"https://doi.org/10.48550/arxiv.2605.11181","title":"Muon is Not That Special: Random or Inverted Spectra Work Just as Well","display_name":"Muon is Not That Special: Random or Inverted Spectra Work Just as Well","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7161023354","doi":"https://doi.org/10.48550/arxiv.2605.11181"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.11181","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11181","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.11181","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087214686","display_name":"Zakhar Shumaylov","orcid":"https://orcid.org/0000-0001-7087-4393"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shumaylov, Zakhar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136060582","display_name":"Natha\u00ebl Da Costa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Da Costa, Natha\u00ebl","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114452711","display_name":"Peter Zaika","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zaika, Peter","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045234906","display_name":"B\u00e1lint Mucs\u00e1nyi","orcid":"https://orcid.org/0000-0002-7075-9018"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mucs\u00e1nyi, B\u00e1lint","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099282196","display_name":"Alex Massucco","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Massucco, Alex","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136026881","display_name":"Yoav Gelberg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gelberg, Yoav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033880300","display_name":"Carola\u2010Bibiane Sch\u00f6nlieb","orcid":"https://orcid.org/0000-0003-0099-6306"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sch\u00f6nlieb, Carola-Bibiane","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029186201","display_name":"Yarin Gal","orcid":"https://orcid.org/0000-0002-2733-2078"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gal, Yarin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5022872779","display_name":"Philipp Hennig","orcid":"https://orcid.org/0000-0001-7293-6092"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hennig, Philipp","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12579","display_name":"Muon and positron interactions and applications","score":0.3409999907016754,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12579","display_name":"Muon and positron interactions and applications","score":0.3409999907016754,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10921","display_name":"Neutrino Physics Research","score":0.14569999277591705,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.11640000343322754,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.40059998631477356},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.39419999718666077},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.37700000405311584},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.3630000054836273},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.3578000068664551},{"id":"https://openalex.org/keywords/ideal","display_name":"Ideal (ethics)","score":0.35580000281333923},{"id":"https://openalex.org/keywords/observer","display_name":"Observer (physics)","score":0.35530000925064087},{"id":"https://openalex.org/keywords/descent","display_name":"Descent (aeronautics)","score":0.33869999647140503}],"concepts":[{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.47699999809265137},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.37700000405311584},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3630000054836273},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3578000068664551},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C2780704645","wikidata":"https://www.wikidata.org/wiki/Q9251458","display_name":"Observer (physics)","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3544999957084656},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3398999869823456},{"id":"https://openalex.org/C2776637919","wikidata":"https://www.wikidata.org/wiki/Q624380","display_name":"Descent (aeronautics)","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C205334942","wikidata":"https://www.wikidata.org/wiki/Q3151","display_name":"Muon","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.31850001215934753},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.30230000615119934},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C14961307","wikidata":"https://www.wikidata.org/wiki/Q5377176","display_name":"Energy minimization","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.2549000084400177},{"id":"https://openalex.org/C7305733","wikidata":"https://www.wikidata.org/wiki/Q207961","display_name":"Geometric shape","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.25270000100135803},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.11181","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11181","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.11181","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11181","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"recent":[1],"empirical":[2],"success":[3],"of":[4,57,119,170],"the":[5,45,83,87,96,167],"Muon":[6,218],"optimizer":[7,126,188],"has":[8],"renewed":[9],"interest":[10],"in":[11],"non-Euclidean":[12],"optimization,":[13],"typically":[14],"justified":[15],"by":[16,64,104,178,221,228],"similarities":[17],"with":[18,131],"second-order":[19],"methods,":[20],"and":[21,77,99,144,183],"linear":[22],"minimization":[23],"oracle":[24],"(LMO)":[25],"theory.":[26],"In":[27],"this":[28,32],"paper,":[29],"we":[30,52,121,172],"challenge":[31],"geometric":[33,41,138],"narrative":[34],"through":[35],"three":[36],"contributions,":[37],"demonstrating":[38],"that":[39,111,127,150,163],"precise":[40,155,216],"structure":[42],"is":[43,157,165,175],"not":[44,166,220],"key":[46],"factor":[47],"affecting":[48],"optimization":[49],"performance.":[50],"First,":[51],"introduce":[53,122],"Freon,":[54],"a":[55,65,116,154,209,215],"family":[56],"optimizers":[58],"based":[59],"on":[60],"Schatten":[61,89],"(quasi-)norms,":[62],"powered":[63],"novel,":[66],"provably":[67],"optimal":[68],"QDWH-based":[69],"iterative":[70],"approximation.":[71],"Freon":[72,112],"naturally":[73],"interpolates":[74],"between":[75],"SGD":[76],"Muon,":[78],"while":[79],"smoothly":[80],"extrapolating":[81],"into":[82],"quasi-norm":[84,97],"regime.":[85],"Empirically,":[86],"best-performing":[88],"parameters":[90],"for":[91],"GPT-2":[92],"lie":[93],"strictly":[94],"within":[95,208],"regime,":[98],"thus":[100],"cannot":[101],"be":[102],"represented":[103],"any":[105,136],"unitarily":[106],"invariant":[107],"LMO.":[108],"Second,":[109],"noting":[110],"performs":[113],"well":[114],"across":[115],"wide":[117],"range":[118],"exponents,":[120],"Kaon,":[123],"an":[124,223],"absurd":[125],"replaces":[128],"singular":[129],"values":[130],"random":[132,211],"noise.":[133],"Despite":[134],"lacking":[135],"coherent":[137],"structure,":[139],"Kaon":[140],"matches":[141],"Muon's":[142],"performance":[143],"retains":[145],"classical":[146],"convergence":[147],"guarantees,":[148],"proving":[149],"strict":[151],"adherence":[152],"to":[153,203],"geometry":[156,164],"practically":[158],"irrelevant.":[159],"Third,":[160],"having":[161],"shown":[162],"primary":[168],"driver":[169],"performance,":[171],"demonstrate":[173],"it":[174],"instead":[176],"controlled":[177],"two":[179,196],"local":[180],"quantities:":[181],"alignment":[182],"descent":[184],"potential.":[185],"Ultimately,":[186],"each":[187],"must":[189],"tune":[190],"its":[191],"step":[192],"size":[193],"around":[194],"these":[195],"quantities.":[197],"While":[198],"their":[199],"dynamics":[200],"are":[201],"difficult":[202],"predict":[204],"a-priori,":[205],"evaluating":[206],"them":[207],"stochastic":[210],"feature":[212],"model":[213],"yields":[214],"insight:":[217],"succeeds":[219],"tracking":[222],"ideal":[224],"global":[225],"geometry,":[226],"but":[227],"guaranteeing":[229],"step-size":[230],"optimality.":[231]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-14T00:00:00"}
