{"id":"https://openalex.org/W7126273321","doi":"https://doi.org/10.48550/arxiv.2601.21702","title":"Beyond Forgetting: Machine Unlearning Elicits Controllable Side Behaviors and Capabilities","display_name":"Beyond Forgetting: Machine Unlearning Elicits Controllable Side Behaviors and Capabilities","publication_year":2026,"publication_date":"2026-01-29","ids":{"openalex":"https://openalex.org/W7126273321","doi":"https://doi.org/10.48550/arxiv.2601.21702"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.21702","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21702","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.21702","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124398099","display_name":"Tien Dang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dang, Tien","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121589887","display_name":"The-Hai Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, The-Hai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124428453","display_name":"Dinh Mai Phuong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Phuong, Dinh Mai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124338420","display_name":"Nguy\u1ec5n V\u0169 Nguy\u00ean Ph\u01b0\u01a1ng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Phuong, Nguyen Minh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066310816","display_name":"Hoang Thanh-Tung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thanh-Tung, Hoang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121568751","display_name":"Le-Minh Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Le-Minh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086772095","display_name":"Naoya Inoue","orcid":"https://orcid.org/0000-0002-1521-8833"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Inoue, Naoya","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5124398099"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.4790000021457672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.4790000021457672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11169999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07119999825954437,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.761900007724762},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.7389000058174133},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.47920000553131104},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4494999945163727},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4406999945640564},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.40310001373291016}],"concepts":[{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.761900007724762},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.7389000058174133},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5706999897956848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5296000242233276},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.47920000553131104},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4494999945163727},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4406999945640564},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C50335755","wikidata":"https://www.wikidata.org/wiki/Q483247","display_name":"Phenomenon","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3116999864578247},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.2603999972343445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.21702","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21702","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.21702","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21702","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"consider":[1],"representation":[2,51,61,69],"misdirection":[3],"(RM),":[4],"a":[5,59,64,111,148,154],"class":[6],"of":[7,22,30,48,114],"LLM":[8],"unlearning":[9,91],"methods":[10],"that":[11,18,140,156,163],"achieves":[12],"forgetting":[13],"by":[14],"manipulating":[15],"the":[16,28,46,49,67,79,102],"forget-representations,":[17],"is,":[19],"latent":[20],"representations":[21],"forget":[23],"samples.":[24],"Despite":[25],"being":[26],"important,":[27],"roles":[29],"target":[31],"vectors":[32],"used":[33],"in":[34],"RM,":[35],"however,":[36],"remain":[37],"underexplored.":[38],"Here,":[39],"we":[40,85],"approach":[41],"and":[42,96,125,127,167],"revisit":[43],"RM":[44],"through":[45],"lens":[47],"linear":[50,68,72],"hypothesis.":[52],"Specifically,":[53],"if":[54,151],"one":[55],"can":[56,157],"somehow":[57],"identify":[58],"one-dimensional":[60],"corresponding":[62,100],"to":[63,101],"high-level":[65,103],"concept,":[66],"hypothesis":[70,106],"enables":[71],"operations":[73],"on":[74],"this":[75,83,141],"concept":[76],"vector":[77],"within":[78],"forget-representation":[80],"space.":[81],"Under":[82],"view,":[84],"hypothesize":[86],"that,":[87],"beyond":[88],"forgetting,":[89],"machine":[90],"elicits":[92],"controllable":[93,168],"side":[94,98],"behaviors":[95],"stronger":[97,165],"capabilities":[99,166],"concept.":[104],"Our":[105,137],"is":[107],"empirically":[108],"validated":[109],"across":[110],"wide":[112],"range":[113],"tasks,":[115],"including":[116],"behavioral":[117],"control":[118],"(e.g.,":[119,130],"controlling":[120],"unlearned":[121,132],"models'":[122,133],"truth,":[123],"sentiment,":[124],"refusal)":[126],"capability":[128],"enhancement":[129],"improving":[131],"in-context":[134],"learning":[135],"capability).":[136],"findings":[138],"reveal":[139],"fairly":[142],"attractive":[143],"phenomenon":[144],"could":[145],"be":[146,158],"either":[147],"hidden":[149],"risk":[150],"misused":[152],"or":[153],"mechanism":[155],"harnessed":[159],"for":[160],"developing":[161],"models":[162],"require":[164],"behaviors.":[169]},"counts_by_year":[],"updated_date":"2026-02-01T03:38:14.988550","created_date":"2026-02-01T00:00:00"}
