{"id":"https://openalex.org/W7128499965","doi":"https://doi.org/10.48550/arxiv.2602.09006","title":"ARO: A New Lens On Matrix Optimization For Large Models","display_name":"ARO: A New Lens On Matrix Optimization For Large Models","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128499965","doi":"https://doi.org/10.48550/arxiv.2602.09006"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.09006","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125535385","display_name":"Wenbo Gong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gong, Wenbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020455236","display_name":"Javier Zazo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zazo, Javier","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125539476","display_name":"Qijun Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Qijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047540105","display_name":"Puqian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Puqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125547220","display_name":"James Hensman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hensman, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125528180","display_name":"Chao Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Chao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5125535385"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.3691999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.3691999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.11569999903440475,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10963","display_name":"Advanced Optimization Algorithms Research","score":0.0364999994635582,"subfield":{"id":"https://openalex.org/subfields/2612","display_name":"Numerical Analysis"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/orthogonalization","display_name":"Orthogonalization","score":0.8112000226974487},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6140999794006348},{"id":"https://openalex.org/keywords/rotation","display_name":"Rotation (mathematics)","score":0.5586000084877014},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.5184999704360962},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5052000284194946},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.49810001254081726},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4187000095844269},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.4156999886035919},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.40700000524520874},{"id":"https://openalex.org/keywords/coordinate-descent","display_name":"Coordinate descent","score":0.4032000005245209}],"concepts":[{"id":"https://openalex.org/C47559304","wikidata":"https://www.wikidata.org/wiki/Q1702189","display_name":"Orthogonalization","level":2,"score":0.8112000226974487},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6140999794006348},{"id":"https://openalex.org/C74050887","wikidata":"https://www.wikidata.org/wiki/Q848368","display_name":"Rotation (mathematics)","level":2,"score":0.5586000084877014},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.5184999704360962},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5052000284194946},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5011000037193298},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.49810001254081726},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.46299999952316284},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.46050000190734863},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4187000095844269},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.4156999886035919},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.40700000524520874},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4065000116825104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4041999876499176},{"id":"https://openalex.org/C157553263","wikidata":"https://www.wikidata.org/wiki/Q5168004","display_name":"Coordinate descent","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3873000144958496},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.37459999322891235},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C2777305511","wikidata":"https://www.wikidata.org/wiki/Q1813469","display_name":"Centring","level":2,"score":0.3555000126361847},{"id":"https://openalex.org/C83633838","wikidata":"https://www.wikidata.org/wiki/Q1256564","display_name":"Rotation matrix","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C15336307","wikidata":"https://www.wikidata.org/wiki/Q1766051","display_name":"Lens (geology)","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.3075000047683716},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C2780427248","wikidata":"https://www.wikidata.org/wiki/Q17014996","display_name":"Fundamental matrix (linear differential equation)","level":2,"score":0.30160000920295715},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.2800999879837036},{"id":"https://openalex.org/C165443888","wikidata":"https://www.wikidata.org/wiki/Q1482183","display_name":"Transformation matrix","level":3,"score":0.27790001034736633},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C17137986","wikidata":"https://www.wikidata.org/wiki/Q215067","display_name":"Orthogonality","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.26339998841285706}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.09006","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.09006","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.09006","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.09006","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Matrix-based":[0],"optimizers":[1],"have":[2],"attracted":[3],"growing":[4],"interest":[5],"for":[6],"improving":[7,98],"LLM":[8,63,135],"training":[9,64],"efficiency,":[10],"with":[11],"significant":[12],"progress":[13],"centered":[14],"on":[15],"orthogonalization/whitening":[16],"based":[17],"methods.":[18],"While":[19],"yielding":[20],"substantial":[21],"performance":[22],"gains,":[23],"a":[24,46,56,71,81,109,163],"fundamental":[25],"question":[26],"arises:":[27],"can":[28,159],"we":[29,107,155],"develop":[30],"new":[31,47],"paradigms":[32],"beyond":[33,92],"orthogonalization,":[34],"pushing":[35],"the":[36,76],"efficiency":[37,100],"frontier":[38],"further?":[39],"We":[40],"present":[41],"\\textbf{Adaptively":[42],"Rotated":[43],"Optimization":[44],"(ARO},":[45],"matrix":[48],"optimization":[49],"framework":[50],"that":[51,90,114,176],"treats":[52],"gradient":[53],"rotation":[54,77],"as":[55,162],"first":[57],"class":[58],"design":[59],"principle.":[60],"ARO":[61,122,158],"accelerates":[62],"by":[65,80],"performing":[66],"normed":[67],"steepest":[68],"descent":[69],"in":[70,101,134,167],"rotated":[72],"coordinate":[73],"system,":[74],"where":[75],"is":[78],"determined":[79],"novel":[82],"norm-informed":[83],"policy.":[84],"This":[85],"perspective":[86],"yields":[87],"update":[88],"rules":[89],"go":[91],"existing":[93],"orthogonalization":[94,130],"and":[95,117,129,143],"whitening":[96],"optimizers,":[97],"sample":[99],"practice.":[102],"To":[103],"make":[104],"comparisons":[105],"reliable,":[106],"propose":[108],"rigorously":[110],"controlled":[111],"benchmarking":[112],"protocol":[113],"reduces":[115],"confounding":[116],"bias.":[118],"Under":[119],"this":[120],"protocol,":[121],"consistently":[123],"outperforms":[124],"AdamW":[125],"(by":[126,132],"1.3":[127],"$\\sim$1.35$\\times$)":[128],"methods":[131],"1.1$\\sim$1.15$\\times$)":[133],"pretraining":[136],"at":[137],"up":[138,144],"to":[139,145],"8B":[140],"activated":[141],"parameters,":[142],"$8\\times$":[146],"overtrain":[147],"budget,":[148],"without":[149],"evidence":[150],"of":[151,170,181],"diminishing":[152],"returns.":[153],"Finally,":[154],"discuss":[156],"how":[157],"be":[160],"reformulated":[161],"symmetry-aware":[164],"optimizer":[165],"grounded":[166],"rotational":[168],"symmetries":[169],"residual":[171],"streams,":[172],"motivating":[173],"advanced":[174],"designs":[175],"enable":[177],"computationally":[178],"efficient":[179],"exploitation":[180],"cross-layer/cross":[182],"module":[183],"couplings.":[184]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-11T00:00:00"}
