{"id":"https://openalex.org/W7133307743","doi":"https://doi.org/10.48550/arxiv.2603.00061","title":"The Hidden Costs of Domain Fine-Tuning: Pii-Bearing Data Degrades Safety and Increases Leakage","display_name":"The Hidden Costs of Domain Fine-Tuning: Pii-Bearing Data Degrades Safety and Increases Leakage","publication_year":2026,"publication_date":"2026-02-10","ids":{"openalex":"https://openalex.org/W7133307743","doi":"https://doi.org/10.48550/arxiv.2603.00061"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00061","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00061","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00061","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5097929011","display_name":"Mr. Jayesh Choudhari","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Choudhari, Jayesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127906854","display_name":"Piyush Kumar Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Piyush Kumar","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5097929011"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2386000007390976,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2386000007390976,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.10490000247955322,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.08380000293254852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.6643000245094299},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6313999891281128},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5045999884605408},{"id":"https://openalex.org/keywords/leak","display_name":"Leak","score":0.4596000015735626},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.45879998803138733},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.43560001254081726},{"id":"https://openalex.org/keywords/information-leakage","display_name":"Information leakage","score":0.40059998631477356},{"id":"https://openalex.org/keywords/disconnection","display_name":"Disconnection","score":0.34459999203681946},{"id":"https://openalex.org/keywords/domain-model","display_name":"Domain model","score":0.3443000018596649}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6757000088691711},{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.6643000245094299},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6313999891281128},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5045999884605408},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4713999927043915},{"id":"https://openalex.org/C2780378346","wikidata":"https://www.wikidata.org/wiki/Q1349983","display_name":"Leak","level":2,"score":0.4596000015735626},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.45879998803138733},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.43560001254081726},{"id":"https://openalex.org/C2779201187","wikidata":"https://www.wikidata.org/wiki/Q2775060","display_name":"Information leakage","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C2780371621","wikidata":"https://www.wikidata.org/wiki/Q3709910","display_name":"Disconnection","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C92548554","wikidata":"https://www.wikidata.org/wiki/Q2262868","display_name":"Domain model","level":3,"score":0.3443000018596649},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C2781460075","wikidata":"https://www.wikidata.org/wiki/Q1399332","display_name":"Compliance (psychology)","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C103824480","wikidata":"https://www.wikidata.org/wiki/Q185889","display_name":"Time domain","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3285999894142151},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.3264000117778778},{"id":"https://openalex.org/C2988987868","wikidata":"https://www.wikidata.org/wiki/Q32635","display_name":"Domain name","level":3,"score":0.3240000009536743},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C50965678","wikidata":"https://www.wikidata.org/wiki/Q2724302","display_name":"Abnormality","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.31279999017715454},{"id":"https://openalex.org/C512654426","wikidata":"https://www.wikidata.org/wiki/Q19652","display_name":"Public domain","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.28369998931884766},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C2779679103","wikidata":"https://www.wikidata.org/wiki/Q5251805","display_name":"Degradation (telecommunications)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2712000012397766},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.26980000734329224},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C198352243","wikidata":"https://www.wikidata.org/wiki/Q37105","display_name":"Line (geometry)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2547000050544739},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00061","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00061","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00061","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00061","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7284228801727295,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Domain":[0],"fine-tuning":[1,90,145,170],"is":[2,54,166],"a":[3,33,75,137,147],"common":[4],"path":[5],"to":[6,104,183],"deploy":[7],"small":[8],"instruction-tuned":[9],"language":[10],"models":[11,102,182,205],"as":[12],"customer-support":[13],"assistants,":[14],"yet":[15],"its":[16],"effects":[17],"on":[18,111,157],"safety-aligned":[19],"behavior":[20,60,98,135],"and":[21,39,69,89,96,122,132],"privacy":[22,71],"are":[23,44],"not":[24,228],"well":[25],"understood.":[26],"In":[27],"real":[28,113],"deployments,":[29],"such":[30],"assistants":[31],"receive":[32],"mixture":[34],"of":[35,79,87,139,195],"benign":[36],"in-domain":[37],"requests":[38],"out-of-domain":[40,97,134],"user":[41],"queries":[42],"that":[43,63],"emotional,":[45],"philosophical,":[46],"or":[47],"adversarial.":[48],"Even":[49],"when":[50,164,211],"the":[51,160,169],"target":[52],"domain":[53,144,208],"benign,":[55],"specialization":[56],"may":[57],"shift":[58,150],"model":[59,110],"in":[61,99,168,180,218],"ways":[62],"weaken":[64],"refusal,":[65],"increase":[66],"harmful":[67,155,196],"compliance,":[68],"induce":[70],"leakage.":[72,200],"We":[73,107,125],"present":[74,167],"controlled":[76],"empirical":[77],"study":[78],"how":[80],"training":[81],"data":[82],"composition":[83],"(presence":[84],"vs.\\":[85],"removal":[86],"PII)":[88],"configuration":[91],"(role-swapping":[92],"(RS))":[93],"shape":[94],"safety":[95,127],"open-source":[100],"chat":[101],"up":[103],"8B":[105],"parameters.":[106],"fine-tune":[108],"each":[109],"5{,}000":[112],"booking-support":[114],"message":[115],"pairs":[116],"under":[117],"three":[118],"settings:":[119],"\\textsc{NoPII-NoRS},":[120],"\\textsc{PII-NoRS},":[121],"\\textsc{PII-RS}":[123],"(role-swapped).":[124],"evaluate":[126],"using":[128,136],"\\textsc{SORRY-Bench}~\\cite{xie2024sorry}":[129],"adversarial":[130],"prompts":[131],"assess":[133],"suite":[138],"philosophical":[140,202],"questions~\\cite{betley2025emergent}.":[141],"Across":[142],"models,":[143],"causes":[146],"large":[148],"distributional":[149],"from":[151,178],"high-quality":[152],"refusals":[153],"toward":[154],"compliance":[156],"\\textsc{SORRY-Bench},":[158],"with":[159,198,213],"most":[161],"severe":[162],"degradation":[163],"PII":[165,199,224],"data.":[171],"For":[172],"example,":[173],"macro-averaged":[174],"strong":[175],"refusal":[176,231],"drops":[177],"$42.6\\%$":[179],"base":[181],"single":[184],"digits":[185],"after":[186],"fine-tuning,":[187],"while":[188],"PII-bearing":[189],"runs":[190],"additionally":[191],"exhibit":[192,207],"double-digit":[193],"rates":[194],"responses":[197],"On":[201],"queries,":[203],"fine-tuned":[204],"frequently":[206],"anchoring":[209],"and,":[210],"trained":[212],"PII,":[214],"leak":[215],"sensitive":[216],"identifiers":[217],"irrelevant":[219],"contexts.":[220],"Role-swapping":[221],"partially":[222],"mitigates":[223],"leakage":[225],"but":[226],"does":[227],"reliably":[229],"restore":[230],"behavior.":[232]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
