{"id":"https://openalex.org/W7138913313","doi":"https://doi.org/10.48550/arxiv.2603.16734","title":"Differential Harm Propensity in Personalized LLM Agents: The Curious Case of Mental Health Disclosure","display_name":"Differential Harm Propensity in Personalized LLM Agents: The Curious Case of Mental Health Disclosure","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7138913313","doi":"https://doi.org/10.48550/arxiv.2603.16734"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16734","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16734","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000390546","display_name":"Caglar Yildirim","orcid":"https://orcid.org/0000-0002-0346-9299"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yildirim, Caglar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":["https://openalex.org/A5000390546"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.2815000116825104,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.2815000116825104,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.24089999496936798,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.13279999792575836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/harm","display_name":"Harm","score":0.801800012588501},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6657999753952026},{"id":"https://openalex.org/keywords/mental-health","display_name":"Mental health","score":0.6000000238418579},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5852000117301941},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.542900025844574},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.46959999203681946},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4239000082015991}],"concepts":[{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.801800012588501},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6657999753952026},{"id":"https://openalex.org/C134362201","wikidata":"https://www.wikidata.org/wiki/Q317309","display_name":"Mental health","level":2,"score":0.6000000238418579},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5852000117301941},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.542900025844574},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5013999938964844},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.46959999203681946},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.4404999911785126},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4239000082015991},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.35600000619888306},{"id":"https://openalex.org/C3019921246","wikidata":"https://www.wikidata.org/wiki/Q104853310","display_name":"Differential effects","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.33090001344680786},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.32330000400543213},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.303600013256073},{"id":"https://openalex.org/C3017944768","wikidata":"https://www.wikidata.org/wiki/Q1450463","display_name":"Poison control","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.29829999804496765},{"id":"https://openalex.org/C2909974400","wikidata":"https://www.wikidata.org/wiki/Q2533016","display_name":"Aggravating Factor","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C70410870","wikidata":"https://www.wikidata.org/wiki/Q199906","display_name":"Clinical psychology","level":1,"score":0.2709999978542328},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.26829999685287476},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.26260000467300415},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.25540000200271606},{"id":"https://openalex.org/C17923572","wikidata":"https://www.wikidata.org/wiki/Q7250160","display_name":"Propensity score matching","level":2,"score":0.2522999942302704},{"id":"https://openalex.org/C166735990","wikidata":"https://www.wikidata.org/wiki/Q1750812","display_name":"Human factors and ergonomics","level":3,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16734","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16734","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7660456299781799}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,113],"(LLMs)":[3],"are":[4,168],"increasingly":[5],"deployed":[6],"as":[7,222],"tool-using":[8],"agents,":[9],"shifting":[10],"safety":[11,33],"concerns":[12],"from":[13],"harmful":[14,18,56,104,128,139],"text":[15],"generation":[16],"to":[17,199],"task":[19,105],"completion.":[20,140],"Deployed":[21],"systems":[22],"often":[23,158],"condition":[24],"on":[25,62,72,183],"user":[26],"profiles":[27],"or":[28,205],"persistent":[29],"memory,":[30],"yet":[31],"agent":[32],"evaluations":[34,244],"typically":[35],"ignore":[36],"personalization":[37,87,219],"signals.":[38],"To":[39],"address":[40],"this":[41],"gap,":[42],"we":[43,66],"investigated":[44],"how":[45],"mental":[46,155],"health":[47,92,156],"disclosure,":[48],"a":[49,96,124,142,187,223],"sensitive":[50],"and":[51,69,94,149,170,202,245],"realistic":[52],"user-context":[53,86,251],"cue,":[54],"affects":[55],"behavior":[57],"in":[58,162,227],"agentic":[59,228],"settings.":[60],"Building":[61],"the":[63,163,178,207,240],"AgentHarm":[64],"benchmark,":[65],"evaluated":[67],"frontier":[68,111],"open-source":[70],"LLMs":[71],"multi-step":[73],"malicious":[74],"tasks":[75],"(and":[76],"their":[77],"benign":[78,184,200],"counterparts)":[79],"under":[80,235],"controlled":[81],"prompt":[82],"conditions":[83,201],"that":[84,103,218,247],"vary":[85],"(no":[88],"bio,":[89],"bio-only,":[90],"bio+mental":[91],"disclosure)":[93],"include":[95],"lightweight":[97],"jailbreak":[98,193],"injection.":[99],"Our":[100],"results":[101,216],"reveal":[102],"completion":[106],"is":[107,233],"non-trivial":[108],"across":[109,250],"models:":[110],"lab":[112],"(e.g.,":[114],"GPT":[115],"5.2,":[116],"Claude":[117],"Sonnet":[118],"4.5,":[119],"Gemini":[120],"3-Pro)":[121],"still":[122],"complete":[123],"measurable":[125],"fraction":[126],"of":[127],"tasks,":[129,185],"while":[130],"an":[131,153],"open":[132],"model":[133],"(DeepSeek":[134],"3.2)":[135],"exhibits":[136],"substantially":[137],"higher":[138],"Adding":[141,152],"bio-only":[143],"context":[144],"generally":[145],"reduces":[146],"harm":[147,197],"scores":[148],"increases":[150],"refusals.":[151],"explicit":[154],"disclosure":[157],"shifts":[159],"outcomes":[160],"further":[161],"same":[164],"direction,":[165],"though":[166],"effects":[167],"modest":[169],"not":[171],"uniformly":[172],"reliable":[173],"after":[174],"multiple-testing":[175],"correction.":[176],"Importantly,":[177],"refusal":[179],"increase":[180],"also":[181],"appears":[182],"indicating":[186],"safety--utility":[188],"trade-off":[189],"via":[190],"over-refusal.":[191],"Finally,":[192],"prompting":[194],"sharply":[195],"elevates":[196],"relative":[198],"can":[203,220],"weaken":[204],"override":[206],"protective":[208,225],"shift":[209],"induced":[210],"by":[211],"personalization.":[212],"Taken":[213],"together,":[214],"our":[215],"indicate":[217],"act":[221],"weak":[224],"factor":[226],"misuse":[229],"settings,":[230],"but":[231],"it":[232],"fragile":[234],"minimal":[236],"adversarial":[237],"pressure,":[238],"highlighting":[239],"need":[241],"for":[242],"personalization-aware":[243],"safeguards":[246],"remain":[248],"robust":[249],"conditions.":[252]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-03-20T00:00:00"}
