{"id":"https://openalex.org/W7134235147","doi":"https://doi.org/10.48550/arxiv.2603.05618","title":"Safer Reasoning Traces: Measuring and Mitigating Chain-of-Thought Leakage in LLMs","display_name":"Safer Reasoning Traces: Measuring and Mitigating Chain-of-Thought Leakage in LLMs","publication_year":2026,"publication_date":"2026-03-05","ids":{"openalex":"https://openalex.org/W7134235147","doi":"https://doi.org/10.48550/arxiv.2603.05618"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05618","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128439309","display_name":"Patrick Ahrend","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahrend, Patrick","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128613719","display_name":"Tobias Eder","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eder, Tobias","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033496544","display_name":"Xiyang I.A. Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128547142","display_name":"Zhiyi Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Zhiyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5004398345","display_name":"Georg Groh","orcid":"https://orcid.org/0000-0002-5942-2297"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Groh, Georg","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.1137000024318695,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.1137000024318695,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.10949999839067459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.10010000318288803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.7864000201225281},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.765500009059906},{"id":"https://openalex.org/keywords/risk-assessment","display_name":"Risk assessment","score":0.492000013589859},{"id":"https://openalex.org/keywords/gatekeeping","display_name":"Gatekeeping","score":0.4595000147819519},{"id":"https://openalex.org/keywords/information-leakage","display_name":"Information leakage","score":0.43540000915527344},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.3549000024795532}],"concepts":[{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.7864000201225281},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.765500009059906},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5001000165939331},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.492000013589859},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4625999927520752},{"id":"https://openalex.org/C73380752","wikidata":"https://www.wikidata.org/wiki/Q609087","display_name":"Gatekeeping","level":2,"score":0.4595000147819519},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.4478999972343445},{"id":"https://openalex.org/C2779201187","wikidata":"https://www.wikidata.org/wiki/Q2775060","display_name":"Information leakage","level":2,"score":0.43540000915527344},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C2779662715","wikidata":"https://www.wikidata.org/wiki/Q19596443","display_name":"Conditional logistic regression","level":3,"score":0.3061000108718872},{"id":"https://openalex.org/C100243477","wikidata":"https://www.wikidata.org/wiki/Q12002092","display_name":"Welfare","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.2752000093460083},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05618","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05618","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05618","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05618","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Chain-of-Thought":[0],"(CoT)":[1],"prompting":[2],"improves":[3],"LLM":[4],"reasoning":[5,21,110],"but":[6],"can":[7,112],"increase":[8],"privacy":[9],"risk":[10,87,168],"by":[11],"resurfacing":[12],"personally":[13],"identifiable":[14],"information":[15],"(PII)":[16],"from":[17],"the":[18,30,67,109,120],"prompt":[19],"into":[20],"traces":[22,60],"and":[23,71,75,100,106,142,149,167],"outputs,":[24],"even":[25],"under":[26,169],"policies":[27,163],"that":[28,47,91,101,164],"instruct":[29],"model":[31,77],"not":[32],"to":[33],"restate":[34],"PII.":[35],"We":[36,89,123],"study":[37],"such":[38],"direct,":[39],"inference-time":[40,127],"PII":[41,57,82],"leakage":[42,50,61,102,117],"using":[43,145],"a":[44,64,80,85,129,132,138,170],"model-agnostic":[45],"framework":[46],"(i)":[48],"defines":[49],"as":[51,63],"risk-weighted,":[52],"token-level":[53],"events":[54],"across":[55,155],"11":[56],"types,":[58],"(ii)":[59],"curves":[62],"function":[65],"of":[66],"allowed":[68],"CoT":[69,92],"budget,":[70],"(iii)":[72],"compares":[73],"open-":[74],"closed-source":[76],"families":[78],"on":[79,119],"structured":[81],"dataset":[83],"with":[84],"hierarchical":[86],"taxonomy.":[88],"find":[90],"consistently":[93],"elevates":[94],"leakage,":[95],"especially":[96],"for":[97],"high-risk":[98],"categories,":[99],"is":[103],"strongly":[104],"family-":[105],"budget-dependent.":[107],"Increasing":[108],"budget":[111],"either":[113],"amplify":[114],"or":[115,157],"attenuate":[116],"depending":[118],"base":[121],"model.":[122],"then":[124],"benchmark":[125],"lightweight":[126],"gatekeepers:":[128],"rule-based":[130],"detector,":[131],"TF-IDF":[133],"+":[134],"logistic":[135],"regression":[136],"classifier,":[137],"GLiNER-based":[139],"NER":[140],"model,":[141],"an":[143],"LLM-as-judge,":[144],"risk-weighted":[146],"F1,":[147],"Macro-F1,":[148],"recall.":[150],"No":[151],"single":[152],"method":[153],"dominates":[154],"models":[156],"budgets,":[158],"motivating":[159],"hybrid,":[160],"style-adaptive":[161],"gatekeeping":[162],"balance":[165],"utility":[166],"common,":[171],"reproducible":[172],"protocol.":[173]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-10T00:00:00"}
