{"id":"https://openalex.org/W7159575061","doi":"https://doi.org/10.48550/arxiv.2604.27063","title":"Learning to Forget: Continual Learning with Adaptive Weight Decay","display_name":"Learning to Forget: Continual Learning with Adaptive Weight Decay","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7159575061","doi":"https://doi.org/10.48550/arxiv.2604.27063"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27063","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27063","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27063","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101351825","display_name":"Aditya A. Ramesh","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ramesh, Aditya A.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134979362","display_name":"Alex Lewandowski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lewandowski, Alex","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128424967","display_name":"J\u00fcrgen Schmidhuber","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schmidhuber, J\u00fcrgen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101351825"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9746999740600586,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9746999740600586,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.0024999999441206455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.002300000051036477,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.8296999931335449},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.45100000500679016},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.43549999594688416},{"id":"https://openalex.org/keywords/exponential-growth","display_name":"Exponential growth","score":0.41609999537467957},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.37950000166893005},{"id":"https://openalex.org/keywords/fade","display_name":"Fade","score":0.37439998984336853},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.36320000886917114},{"id":"https://openalex.org/keywords/scalar","display_name":"Scalar (mathematics)","score":0.3492000102996826}],"concepts":[{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.8296999931335449},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5016999840736389},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.45100000500679016},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.43549999594688416},{"id":"https://openalex.org/C75235859","wikidata":"https://www.wikidata.org/wiki/Q582659","display_name":"Exponential growth","level":2,"score":0.41609999537467957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3978999853134155},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.37950000166893005},{"id":"https://openalex.org/C2778518048","wikidata":"https://www.wikidata.org/wiki/Q848346","display_name":"Fade","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3700000047683716},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.3492000102996826},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.34769999980926514},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C180188523","wikidata":"https://www.wikidata.org/wiki/Q574576","display_name":"Exponential decay","level":2,"score":0.3154999911785126},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C151376022","wikidata":"https://www.wikidata.org/wiki/Q168698","display_name":"Exponential function","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27900001406669617},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C125014702","wikidata":"https://www.wikidata.org/wiki/Q4680749","display_name":"Adaptive learning","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C52970973","wikidata":"https://www.wikidata.org/wiki/Q2497134","display_name":"Adaptive system","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27063","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27063","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27063","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27063","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Continual":[0],"learning":[1],"agents":[2],"with":[3,11],"finite":[4],"capacity":[5,28],"must":[6],"balance":[7],"acquiring":[8],"new":[9],"knowledge":[10,20,73],"retaining":[12],"the":[13,49,102,110],"old.":[14],"This":[15],"requires":[16],"controlled":[17],"forgetting":[18,59],"of":[19,113],"that":[21,120],"is":[22],"no":[23],"longer":[24],"needed,":[25],"freeing":[26],"up":[27],"to":[29,109],"learn.":[30],"Weight":[31],"decay,":[32],"viewed":[33],"as":[34],"a":[35,52],"mechanism":[36],"for":[37,101,127],"forgetting,":[38],"can":[39],"serve":[40],"this":[41,58],"role":[42],"by":[43],"gradually":[44],"discarding":[45],"information":[46],"stored":[47],"in":[48],"weights.":[50],"However,":[51],"fixed":[53,137],"scalar":[54],"weight":[55,90,138],"decay":[56,91,125,139],"drives":[57],"uniformly":[60,64],"over":[61,136],"time":[62],"and":[63,106,133,143],"across":[65,140],"all":[66],"parameters,":[67,129],"even":[68],"when":[69],"some":[70],"encode":[71],"stable":[72],"while":[74],"others":[75],"track":[76],"rapidly":[77],"changing":[78],"targets.":[79],"We":[80,98],"introduce":[81],"Forgetting":[82],"through":[83],"Adaptive":[84],"Decay":[85],"(FADE),":[86],"which":[87],"adapts":[88],"per-parameter":[89],"rates":[92,126],"online":[93,103,141],"via":[94],"approximate":[95],"meta-gradient":[96],"descent.":[97],"derive":[99],"FADE":[100,121],"linear":[104],"setting":[105],"apply":[107],"it":[108],"final":[111],"layer":[112],"neural":[114],"networks.":[115],"Our":[116],"empirical":[117],"analysis":[118],"shows":[119],"automatically":[122],"discovers":[123],"distinct":[124],"different":[128],"complements":[130],"step-size":[131],"adaptation,":[132],"consistently":[134],"improves":[135],"tracking":[142],"streaming":[144],"classification":[145],"problems.":[146]},"counts_by_year":[],"updated_date":"2026-05-02T06:10:54.344120","created_date":"2026-05-02T00:00:00"}
