{"id":"https://openalex.org/W7138333215","doi":"https://doi.org/10.1609/aaai.v40i38.40530","title":"Learning from the Undesirable: Robust Adaptation of Language Models Without Forgetting","display_name":"Learning from the Undesirable: Robust Adaptation of Language Models Without Forgetting","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138333215","doi":"https://doi.org/10.1609/aaai.v40i38.40530"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i38.40530","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40530","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i38.40530","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129666522","display_name":"Yunhun Nam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunhun Nam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653907","display_name":"Jaehyung Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jaehyung Kim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129707700","display_name":"Jongheon Jeong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jongheon Jeong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46961064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"38","first_page":"32537","last_page":"32545"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3043999969959259,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3043999969959259,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.26460000872612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07169999927282333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.8514999747276306},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.6802999973297119},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.6571999788284302},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5830000042915344},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5454999804496765},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.462799996137619},{"id":"https://openalex.org/keywords/decorrelation","display_name":"Decorrelation","score":0.4002000093460083},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.3626999855041504},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.3596999943256378}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.8514999747276306},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7433000206947327},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.6802999973297119},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.6571999788284302},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5830000042915344},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5454999804496765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5070000290870667},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.462799996137619},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4068000018596649},{"id":"https://openalex.org/C177860922","wikidata":"https://www.wikidata.org/wiki/Q788608","display_name":"Decorrelation","level":2,"score":0.4002000093460083},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.3626999855041504},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32839998602867126},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.27309998869895935},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C5465570","wikidata":"https://www.wikidata.org/wiki/Q5326898","display_name":"Early stopping","level":3,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i38.40530","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40530","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i38.40530","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40530","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6471523642539978,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Language":[0],"models":[1],"(LMs)":[2],"are":[3,99],"often":[4],"adapted":[5],"through":[6,146],"supervised":[7],"fine-tuning":[8,23,82,93],"(SFT)":[9],"to":[10,29,35,39,49,77,90,95,101,193,205,216,230],"specialize":[11],"their":[12],"capabilities":[13,54],"for":[14,75],"downstream":[15,161],"tasks.":[16,210],"However,":[17],"in":[18,226],"typical":[19],"scenarios":[20],"where":[21,200],"the":[22,45,92,111,133,197,201],"data":[24,144],"is":[25],"limited,":[26],"e.g.,":[27,105,219],"compared":[28,192,229],"pre-training,":[30],"SFT":[31,76,195],"can":[32],"lead":[33],"LMs":[34,83],"overfit,":[36],"causing":[37],"them":[38],"rely":[40],"on":[41,158,189,196,208],"spurious":[42],"patterns":[43],"within":[44],"target":[46],"task":[47],"or":[48],"compromise":[50],"other":[51],"broadly":[52],"useful":[53],"as":[55,167],"a":[56,69,121,185,221],"side":[57],"effect":[58],"of":[59,124,132],"narrow":[60],"specialization.":[61],"In":[62],"this":[63,117],"paper,":[64],"we":[65,88,119],"propose":[66,120],"Learning-from-the-Undesirable":[67],"(LfU),":[68],"simple":[70],"yet":[71],"effective":[72,169],"regularization":[73,126],"scheme":[74],"mitigate":[78],"overfitting":[79],"issues":[80],"when":[81],"with":[84,135],"limited":[85,154],"data.":[86,155],"Specifically,":[87],"aim":[89],"regularize":[91],"process":[94],"favor":[96],"solutions":[97],"that":[98,109,127,164,171],"resilient":[100],"\u201cundesirable\u201d":[102],"model":[103,112,134],"updates,":[104,148],"gradient":[106],"ascent":[107],"steps":[108],"steer":[110],"toward":[113],"undesirable":[114,139,147],"behaviors.":[115],"To":[116],"end,":[118],"novel":[122],"form":[123],"consistency":[125],"directly":[128],"aligns":[129],"internal":[130],"representations":[131],"those":[136,209],"after":[137],"an":[138,168],"update.":[140],"By":[141],"leveraging":[142],"representation-level":[143],"augmentation":[145],"LfU":[149,165,183,212],"effectively":[150],"promotes":[151],"generalization":[152],"under":[153],"Our":[156],"experiments":[157],"diverse":[159],"LM":[160,181],"tasks":[162,191],"show":[163],"serves":[166],"prior":[170],"enhances":[172],"adaptability":[173],"while":[174],"preserving":[175],"pretrained":[176],"knowledge.":[177],"For":[178],"example,":[179],"our":[180],"from":[182],"achieves":[184],"16.8%":[186],"average":[187],"improvement":[188],"math":[190],"vanilla":[194],"same":[198],"dataset,":[199],"latter":[202],"even":[203],"leads":[204],"degraded":[206],"performance":[207],"Furthermore,":[211],"exhibits":[213],"improved":[214],"robustness":[215],"prompt":[217],"variations,":[218],"yielding":[220],"92.1%":[222],"lower":[223],"standard":[224],"deviation":[225],"output":[227],"performances":[228],"SFT,":[231],"highlighting":[232],"its":[233],"versatile":[234],"effects.":[235]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
