{"id":"https://openalex.org/W7128367670","doi":"https://doi.org/10.48550/arxiv.2602.06204","title":"Learning Rate Scaling across LoRA Ranks and Transfer to Full Finetuning","display_name":"Learning Rate Scaling across LoRA Ranks and Transfer to Full Finetuning","publication_year":2026,"publication_date":"2026-02-05","ids":{"openalex":"https://openalex.org/W7128367670","doi":"https://doi.org/10.48550/arxiv.2602.06204"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.06204","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125422177","display_name":"Nan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Nan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035079908","display_name":"Soledad Villar","orcid":"https://orcid.org/0000-0003-4968-3829"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Villar, Soledad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089509060","display_name":"Soufiane Hayou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hayou, Soufiane","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5125422177"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.6136000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.6136000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.16189999878406525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0723000019788742,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.70169997215271},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6958000063896179},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6215000152587891},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.532800018787384},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.3668000102043152},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.34209999442100525},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.32749998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7196999788284302},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.70169997215271},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6958000063896179},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6215000152587891},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.532800018787384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5311999917030334},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4593000113964081},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.3668000102043152},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.32749998569488525},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C34585555","wikidata":"https://www.wikidata.org/wiki/Q1368723","display_name":"Learning curve","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26589998602867126}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.06204","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.06204","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.06204","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.06204","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Low-Rank":[0],"Adaptation":[1,74],"(LoRA)":[2],"is":[3,44,66,104],"a":[4,16,76,165],"standard":[5,101],"tool":[6],"for":[7,185],"parameter-efficient":[8],"finetuning":[9],"of":[10,181],"large":[11],"models.":[12],"While":[13],"it":[14,43,157],"induces":[15],"small":[17],"memory":[18],"footprint,":[19],"its":[20],"training":[21],"dynamics":[22],"can":[23],"be":[24],"surprisingly":[25],"complex":[26],"as":[27,34],"they":[28],"depend":[29],"on":[30,132,209],"several":[31],"hyperparameters":[32],"such":[33],"initialization,":[35],"adapter":[36,53,92],"rank,":[37,54],"and":[38,91,120,134,154,195,203],"learning":[39,49,61,84,125,147,169,182,197,206],"rate.":[40],"In":[41,68],"particular,":[42],"unclear":[45],"how":[46,81],"the":[47,60,64,82,107,123,145,179],"optimal":[48,124,146],"rate":[50,62,85,126,148,170,183],"scales":[51,158],"with":[52,88,160],"which":[55],"forces":[56],"practitioners":[57],"to":[58,94,174,213],"re-tune":[59],"whenever":[63],"rank":[65,93],"changed.":[67],"this":[69],"paper,":[70],"we":[71,139],"introduce":[72],"Maximal-Update":[73,108],"($\u03bc$A),":[75],"theoretical":[77],"framework":[78],"that":[79,122,167,205],"characterizes":[80],"\"optimal\"":[83],"should":[86],"scale":[87],"model":[89],"width":[90],"produce":[95],"stable,":[96],"non-vanishing":[97],"feature":[98],"updates":[99],"under":[100],"configurations.":[102],"$\u03bc$A":[103],"inspired":[105],"from":[106,117,172],"Parametrization":[109],"($\u03bc$P)":[110],"in":[111],"pretraining.":[112],"Our":[113],"analysis":[114],"leverages":[115],"techniques":[116],"hyperparameter":[118],"transfer":[119,171,211],"reveals":[121],"exhibits":[127],"different":[128],"scaling":[129,136,201],"patterns":[130],"depending":[131],"initialization":[133],"LoRA":[135,173,210],"factor.":[137],"Specifically,":[138],"identify":[140,164],"two":[141],"regimes:":[142],"one":[143],"where":[144,156],"remains":[149],"roughly":[150],"invariant":[151],"across":[152,189],"ranks,":[153],"another":[155],"inversely":[159],"rank.":[161],"We":[162],"further":[163],"configuration":[166],"allows":[168],"full":[175,186,214],"finetuning,":[176],"drastically":[177],"reducing":[178],"cost":[180],"tuning":[184],"finetuning.":[187,215],"Experiments":[188],"language,":[190],"vision,":[191],"vision--language,":[192],"image":[193],"generation,":[194],"reinforcement":[196],"tasks":[198],"validate":[199],"our":[200],"rules":[202],"show":[204],"rates":[207],"tuned":[208],"reliably":[212]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-10T00:00:00"}
