{"id":"https://openalex.org/W7138337562","doi":"https://doi.org/10.48550/arxiv.2603.14517","title":"Learning to Forget: Sleep-Inspired Memory Consolidation for Resolving Proactive Interference in Large Language Models","display_name":"Learning to Forget: Sleep-Inspired Memory Consolidation for Resolving Proactive Interference in Large Language Models","publication_year":2026,"publication_date":"2026-03-15","ids":{"openalex":"https://openalex.org/W7138337562","doi":"https://doi.org/10.48550/arxiv.2603.14517"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.14517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.14517","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129740359","display_name":"Ying Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xie, Ying","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5129740359"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10985","display_name":"Sleep and Wakefulness Research","score":0.42149999737739563,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10985","display_name":"Sleep and Wakefulness Research","score":0.42149999737739563,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.10270000249147415,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07090000063180923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.5498999953269958},{"id":"https://openalex.org/keywords/memory-consolidation","display_name":"Memory consolidation","score":0.5174000263214111},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4702000021934509},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.46630001068115234},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.40630000829696655},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.3849000036716461},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.3725000023841858},{"id":"https://openalex.org/keywords/interference","display_name":"Interference (communication)","score":0.36820000410079956},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.36340001225471497}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7529000043869019},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.5498999953269958},{"id":"https://openalex.org/C48455012","wikidata":"https://www.wikidata.org/wiki/Q2892593","display_name":"Memory consolidation","level":3,"score":0.5174000263214111},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4867999851703644},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.46630001068115234},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.40630000829696655},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3849000036716461},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.36820000410079956},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.36340001225471497},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3614000082015991},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.34880000352859497},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33629998564720154},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.31459999084472656},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.3140000104904175},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.3140000104904175},{"id":"https://openalex.org/C2775841894","wikidata":"https://www.wikidata.org/wiki/Q4683692","display_name":"Sleep (system call)","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2854999899864197},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2786000072956085},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.266400009393692},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26260000467300415},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C2776014549","wikidata":"https://www.wikidata.org/wiki/Q3050847","display_name":"Consolidation (business)","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C27853696","wikidata":"https://www.wikidata.org/wiki/Q3480151","display_name":"Interference theory","level":4,"score":0.25940001010894775},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.14517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.14517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,145],"models":[2],"(LLMs)":[3],"suffer":[4],"from":[5,166],"proactive":[6],"interference":[7,21,164],"(PI):":[8],"outdated":[9],"information":[10],"in":[11,128],"the":[12,76,148,155,163],"context":[13,36],"window":[14],"disrupts":[15],"retrieval":[16,23,153,184],"of":[17,35],"current":[18],"values.":[19],"This":[20],"degrades":[22],"accuracy":[24,185],"log-linearly":[25],"as":[26],"stale":[27,107],"associations":[28],"accumulate,":[29],"a":[30,62,71,85,97,112,139,174],"bottleneck":[31],"that":[32,66,115,220],"persists":[33],"regardless":[34],"length":[37],"and":[38,56,110,151,190,207],"resists":[39],"prompt-engineering":[40],"mitigations.":[41],"Biological":[42],"brains":[43],"resolve":[44],"an":[45,133,217],"analogous":[46],"challenge":[47],"through":[48],"sleep-dependent":[49],"memory":[50],"consolidation:":[51],"synaptic":[52],"downscaling,":[53],"selective":[54],"replay,":[55],"targeted":[57],"forgetting.":[58],"We":[59,137],"propose":[60],"SleepGate,":[61],"biologically":[63],"inspired":[64],"framework":[65,215],"augments":[67],"transformer-based":[68],"LLMs":[69],"with":[70,173],"learned":[72],"sleep":[73,129,156],"cycle":[74],"over":[75],"key-value":[77],"(KV)":[78],"cache.":[79],"SleepGate":[80,161,181],"introduces":[81],"three":[82],"mechanisms:":[83],"(1)":[84],"conflict-aware":[86],"temporal":[87],"tagger":[88],"detecting":[89],"when":[90],"new":[91],"entries":[92,118],"supersede":[93],"old":[94],"ones;":[95],"(2)":[96],"lightweight":[98],"forgetting":[99],"gate":[100],"trained":[101],"to":[102,168],"selectively":[103],"evict":[104],"or":[105],"compress":[106],"cache":[108],"entries;":[109],"(3)":[111],"consolidation":[113],"module":[114],"merges":[116],"surviving":[117],"into":[119],"compact":[120],"summaries.":[121],"These":[122],"components":[123],"activate":[124],"periodically":[125],"during":[126,147,154],"inference":[127],"micro-cycles,":[130],"governed":[131],"by":[132],"adaptive":[134],"entropy-based":[135],"trigger.":[136],"formalize":[138],"dual-phase":[140],"training":[141],"objective":[142],"jointly":[143],"optimizing":[144],"modeling":[146],"wake":[149],"phase":[150],"post-consolidation":[152],"phase.":[157],"Theoretical":[158],"analysis":[159],"shows":[160],"reduces":[162],"horizon":[165],"O(n)":[167],"O(log":[169],"n).":[170],"In":[171],"experiments":[172],"small-scale":[175],"transformer":[176],"(4":[177],"layers,":[178],"793K":[179],"parameters),":[180],"achieves":[182],"99.5%":[183],"at":[186,192],"PI":[187],"depth":[188,193],"5":[189],"97.0%":[191],"10,":[194],"while":[195],"all":[196],"five":[197],"baselines":[198],"--":[199,210],"full":[200],"KV":[201],"cache,":[202],"sliding":[203],"window,":[204],"H2O,":[205],"StreamingLLM,":[206],"decay-only":[208],"ablation":[209],"remain":[211],"below":[212],"18%.":[213],"Our":[214],"offers":[216],"architecture-level":[218],"solution":[219],"prompt":[221],"engineering":[222],"cannot":[223],"address.":[224]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
