{"id":"https://openalex.org/W7131403589","doi":"https://doi.org/10.48550/arxiv.2602.20162","title":"Talking to Yourself: Defying Forgetting in Large Language Models","display_name":"Talking to Yourself: Defying Forgetting in Large Language Models","publication_year":2026,"publication_date":"2026-01-23","ids":{"openalex":"https://openalex.org/W7131403589","doi":"https://doi.org/10.48550/arxiv.2602.20162"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.20162","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20162","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.20162","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126813872","display_name":"Yutao Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sun, Yutao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126795000","display_name":"Mingshuai Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Mingshuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126806347","display_name":"Tiancheng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Tiancheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085530168","display_name":"Phillip Miao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao, Phillip","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013896311","display_name":"Zilun Zhang","orcid":"https://orcid.org/0009-0008-5961-5970"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zilun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058869386","display_name":"Haozhan Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Haozhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126846248","display_name":"Ruizhe Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Ruizhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126807725","display_name":"Jianwei Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Jianwei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5126813872"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3319999873638153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3319999873638153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.30219998955726624,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13050000369548798,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.9376000165939331},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5849999785423279},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.522599995136261},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.41359999775886536},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.40130001306533813},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.34610000252723694},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.33379998803138733}],"concepts":[{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.9376000165939331},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7024000287055969},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5849999785423279},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.522599995136261},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5040000081062317},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.40130001306533813},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38040000200271606},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.34610000252723694},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.33379998803138733},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.30480000376701355},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.29580000042915344},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.20162","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20162","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.20162","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20162","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Catastrophic":[0],"forgetting":[1,68,117],"remains":[2],"a":[3,27,112,146],"major":[4],"challenge":[5],"when":[6],"fine-tuning":[7],"large":[8],"language":[9],"models":[10],"(LLMs)":[11],"on":[12],"narrow,":[13],"task-specific":[14],"data,":[15],"often":[16],"degrading":[17],"their":[18],"general":[19],"knowledge":[20],"and":[21,40,85,100,125,148],"reasoning":[22],"abilities.":[23],"We":[24],"propose":[25],"SA-SFT,":[26],"lightweight":[28],"self-augmentation":[29,144],"routine":[30],"in":[31,90],"which":[32],"an":[33,132],"LLM":[34,153],"generates":[35],"self-dialogues":[36],"prior":[37],"to":[38,81,135],"fine-tuning,":[39],"the":[41,82,87],"resulting":[42],"self-authored":[43],"data":[44,49,60,102,130],"are":[45],"mixed":[46],"with":[47],"task":[48],"without":[50,155],"modifying":[51],"optimization":[52],"or":[53,61],"training":[54],"schedules.":[55],"Despite":[56],"requiring":[57],"no":[58],"external":[59,101],"additional":[62],"tuning,":[63],"SA-SFT":[64],"consistently":[65],"mitigates":[66],"catastrophic":[67,157],"while":[69],"improving":[70],"in-domain":[71],"performance.":[72],"Across":[73],"50":[74],"evaluation":[75],"scenarios,":[76],"it":[77],"maintains":[78],"performance":[79],"comparable":[80],"original":[83],"model":[84],"achieves":[86],"best":[88],"results":[89,141],"40":[91],"cases,":[92],"outperforming":[93],"common":[94],"baselines":[95],"such":[96],"as":[97],"layer":[98],"freezing":[99],"mixing.":[103],"Guided":[104],"by":[105],"these":[106],"empirical":[107],"findings,":[108],"we":[109],"further":[110],"present":[111],"theoretical":[113],"analysis":[114],"suggesting":[115],"that":[116,126,143],"can":[118],"partly":[119],"stem":[120],"from":[121],"style-induced":[122],"parameter":[123],"drift,":[124],"self-alignment":[127],"through":[128],"self-generated":[129],"provides":[131],"effective":[133,149],"means":[134],"counteract":[136],"this":[137],"effect.":[138],"Overall,":[139],"our":[140],"indicate":[142],"offers":[145],"simple":[147],"mechanism":[150],"for":[151],"robust":[152],"adaptation":[154],"incurring":[156],"forgetting.":[158]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
