{"id":"https://openalex.org/W7161759270","doi":"https://doi.org/10.48550/arxiv.2605.20005","title":"Fine-Tuning Without Forgetting via Loss-Adaptive Learning Rates","display_name":"Fine-Tuning Without Forgetting via Loss-Adaptive Learning Rates","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161759270","doi":"https://doi.org/10.48550/arxiv.2605.20005"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.20005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.20005","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082514869","display_name":"Parjanya Prashant","orcid":"https://orcid.org/0000-0003-1718-3037"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prashant, Parjanya Prajakta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136509020","display_name":"Jiongli Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jiongli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136578162","display_name":"Aldan Creo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Creo, Aldan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103209063","display_name":"Babak Salimi","orcid":"https://orcid.org/0000-0003-2485-9533"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Salimi, Babak","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.6812000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.6812000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13740000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.04639999940991402,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.8758000135421753},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5979999899864197},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4178999960422516},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.3666999936103821},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.35370001196861267},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.32249999046325684},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.30079999566078186}],"concepts":[{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.8758000135421753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6549999713897705},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5979999899864197},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5432000160217285},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4000999927520752},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.3666999936103821},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.35370001196861267},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.32249999046325684},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.30079999566078186},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2955999970436096},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.2816999852657318},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2587999999523163},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.20005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.20005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fine-tuning":[0],"large":[1],"language":[2,147],"models":[3],"on":[4,125,155],"new":[5,44],"data":[6],"improves":[7],"task":[8,160],"performance":[9,161],"but":[10,37],"degrades":[11],"capabilities":[12],"learned":[13],"during":[14,199],"pretraining,":[15],"a":[16,72,116],"phenomenon":[17],"known":[18],"as":[19,131],"catastrophic":[20],"forgetting.":[21,108],"Existing":[22],"methods":[23],"mitigate":[24],"this":[25,111],"by":[26,82,110,153,173],"modifying":[27],"the":[28,83,86,90,94,122,132,137,159],"fine-tuning":[29,138],"objective":[30,139],"to":[31,60,106,195],"suppress":[32],"high-loss":[33,101,126],"tokens":[34,39,56],"or":[35],"sequences,":[36],"these":[38],"are":[40,103,191],"essential":[41],"for":[42,75],"learning":[43,87,123],"tasks,":[45],"especially":[46,104],"those":[47],"with":[48],"poor":[49],"pretraining":[50],"coverage.":[51],"In":[52],"such":[53],"settings,":[54],"hard":[55],"should":[57],"still":[58],"contribute":[59],"learning,":[61],"so":[62],"forgetting":[63,79,152],"must":[64],"be":[65],"controlled":[66],"without":[67],"suppressing":[68],"them.":[69],"We":[70],"identify":[71],"simple":[73],"mechanism":[74],"doing":[76],"so:":[77],"per-step":[78],"is":[80],"bounded":[81],"product":[84],"of":[85,93,162],"rate":[88,124],"and":[89,128,145,175],"square":[91],"root":[92],"current":[95],"training":[96],"loss.":[97],"This":[98],"suggests":[99],"that":[100,120,188],"batches":[102,127],"prone":[105],"inducing":[107],"Motivated":[109],"observation,":[112],"we":[113],"introduce":[114],"FINCH,":[115],"loss-adaptive":[117],"learning-rate":[118,189],"schedule":[119],"reduces":[121,151],"increases":[129],"it":[130],"model":[133,197],"converges,":[134],"while":[135,157,179],"leaving":[136],"unchanged.":[140],"Across":[141],"knowledge":[142,167],"acquisition,":[143,168],"science,":[144],"low-resource":[146],"adaptation":[148],"benchmarks,":[149],"FINCH":[150,169],"93%":[154],"average":[156],"matching":[158],"standard":[163],"fine-tuning.":[164],"On":[165],"Qwen3-4B":[166],"cuts":[170],"TruthfulQA":[171],"degradation":[172],"5x":[174],"reverses":[176],"HaluEval":[177],"degradation,":[178],"better":[180],"preserving":[181],"confidence":[182],"calibration.":[183],"Overall,":[184],"our":[185],"results":[186],"show":[187],"schedules":[190],"an":[192],"effective":[193],"tool":[194],"shape":[196],"behavior":[198],"fine-tuning,":[200],"beyond":[201],"just":[202],"target-task":[203],"optimization.":[204]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
