{"id":"https://openalex.org/W7158299073","doi":"https://doi.org/10.48550/arxiv.2604.25840","title":"PSI-Bench: Towards Clinically Grounded and Interpretable Evaluation of Depression Patient Simulators","display_name":"PSI-Bench: Towards Clinically Grounded and Interpretable Evaluation of Depression Patient Simulators","publication_year":2026,"publication_date":"2026-04-28","ids":{"openalex":"https://openalex.org/W7158299073","doi":"https://doi.org/10.48550/arxiv.2604.25840"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.25840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.25840","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093130665","display_name":"Nguyen Khoi Hoang","orcid":"https://orcid.org/0009-0007-0753-7555"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoang, Nguyen Khoi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134869796","display_name":"Shuhaib Mehri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mehri, Shuhaib","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024714223","display_name":"Tse-An Hsu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hsu, Tse-An","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134868419","display_name":"Yi-Jyun Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yi-Jyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016538508","display_name":"Quynh Xuan Nguyen Truong","orcid":"https://orcid.org/0000-0001-6929-4404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Truong, Quynh Xuan Nguyen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134824245","display_name":"Khoa D Doan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Doan, Khoa D","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134843929","display_name":"Dilek Hakkani-T\u00fcr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hakkani-T\u00fcr, Dilek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11519","display_name":"Digital Mental Health Interventions","score":0.723800003528595,"subfield":{"id":"https://openalex.org/subfields/3202","display_name":"Applied Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11519","display_name":"Digital Mental Health Interventions","score":0.723800003528595,"subfield":{"id":"https://openalex.org/subfields/3202","display_name":"Applied Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.06199999898672104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.06120000034570694,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7050999999046326},{"id":"https://openalex.org/keywords/depression","display_name":"Depression (economics)","score":0.5213000178337097},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.49889999628067017},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.46140000224113464},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3962000012397766},{"id":"https://openalex.org/keywords/mental-health","display_name":"Mental health","score":0.38109999895095825},{"id":"https://openalex.org/keywords/patient-safety","display_name":"Patient safety","score":0.3628999888896942}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7050999999046326},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5457000136375427},{"id":"https://openalex.org/C2776867660","wikidata":"https://www.wikidata.org/wiki/Q1814941","display_name":"Depression (economics)","level":2,"score":0.5213000178337097},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.49889999628067017},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.46140000224113464},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4293000102043152},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C134362201","wikidata":"https://www.wikidata.org/wiki/Q317309","display_name":"Mental health","level":2,"score":0.38109999895095825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3698999881744385},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3677000105381012},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.3628999888896942},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.34450000524520874},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.33390000462532043},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3084000051021576},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3050999939441681},{"id":"https://openalex.org/C3018822202","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Patient data","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C80059220","wikidata":"https://www.wikidata.org/wiki/Q7521302","display_name":"Simulated patient","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C19527891","wikidata":"https://www.wikidata.org/wiki/Q1120908","display_name":"Medical physics","level":1,"score":0.27250000834465027},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C3019858935","wikidata":"https://www.wikidata.org/wiki/Q4340209","display_name":"Depressive symptoms","level":3,"score":0.26019999384880066},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2547999918460846},{"id":"https://openalex.org/C70410870","wikidata":"https://www.wikidata.org/wiki/Q199906","display_name":"Clinical psychology","level":1,"score":0.25369998812675476},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.25840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.25840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Patient":[0],"simulators":[1,39,101,162],"are":[2],"gaining":[3],"traction":[4],"in":[5],"mental":[6],"health":[7],"training":[8],"by":[9],"providing":[10],"scalable":[11],"exposure":[12],"to":[13,169],"complex":[14],"and":[15,28,34,43,58,85,98,115,163,174],"sensitive":[16],"patient":[17,30,45,79,161],"interactions.":[18],"Simulating":[19],"depressed":[20],"patients":[21],"is":[22,147],"particularly":[23],"challenging,":[24],"as":[25],"safety":[26],"constraints":[27],"high":[29],"variability":[31],"complicate":[32],"simulations":[33],"underscore":[35],"the":[36,125,135],"need":[37],"for":[38],"that":[40,71,100,124,144],"capture":[41],"diverse":[42,106],"realistic":[44],"behaviors.":[46],"However,":[47],"existing":[48],"evaluations":[49],"heavily":[50],"rely":[51],"on":[52,132],"LLM-judges":[53],"with":[54,150],"poorly":[55],"specified":[56],"prompts":[57],"do":[59],"not":[60],"assess":[61],"behavioral":[62],"diversity.":[63],"We":[64,121],"introduce":[65],"PSI-Bench,":[66,89],"an":[67,165],"automatic":[68],"evaluation":[69],"framework":[70,127],"provides":[72,164],"interpretable,":[73,166],"clinically":[74],"grounded":[75],"diagnostics":[76],"of":[77,158],"depression":[78,160],"simulator":[80,96,172],"behavior":[81],"across":[82,94],"turn-,":[83],"dialogue-,":[84],"population-level":[86],"dimensions.":[87],"Using":[88],"we":[90],"benchmark":[91,146,168],"seven":[92],"LLMs":[93],"two":[95],"frameworks":[97],"find":[99],"produce":[102],"overly":[103],"long,":[104],"lexically":[105],"responses,":[107],"show":[108,123],"reduced":[109],"variability,":[110],"resolve":[111],"emotions":[112],"too":[113],"quickly,":[114],"follow":[116],"a":[117,129,140],"uniform":[118],"negative-to-positive":[119],"trajectory.":[120],"also":[122],"simulation":[126],"has":[128],"larger":[130],"impact":[131],"fidelity":[133],"than":[134],"model":[136],"scale.":[137],"Results":[138],"from":[139],"human":[141],"study":[142],"demonstrate":[143],"our":[145],"strongly":[148],"aligned":[149],"expert":[151],"judgments.":[152],"Our":[153],"work":[154],"reveals":[155],"key":[156],"limitations":[157],"current":[159],"extensible":[167],"guide":[170],"future":[171],"design":[173],"evaluation.":[175]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-30T00:00:00"}
