{"id":"https://openalex.org/W7147467124","doi":"https://doi.org/10.48550/arxiv.2603.29373","title":"Beyond Idealized Patients: Evaluating LLMs under Challenging Patient Behaviors in Medical Consultations","display_name":"Beyond Idealized Patients: Evaluating LLMs under Challenging Patient Behaviors in Medical Consultations","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7147467124","doi":"https://doi.org/10.48550/arxiv.2603.29373"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.29373","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29373","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.29373","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132635177","display_name":"Yahan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Yahan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132657620","display_name":"Xinyi Jie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie, Xinyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132604323","display_name":"Wanjia Ruan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruan, Wanjia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073899717","display_name":"Xubei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xubei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079896065","display_name":"Huaijie Zhu","orcid":"https://orcid.org/0000-0001-8263-9032"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Huaijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103001919","display_name":"Yicheng Gao","orcid":"https://orcid.org/0009-0006-3419-8520"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Yicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069817721","display_name":"Chaohao Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Chaohao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132615139","display_name":"Ruishan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ruishan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5132635177"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.44040000438690186,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.44040000438690186,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2615000009536743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.1729000061750412,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intervention","display_name":"Intervention (counseling)","score":0.5415999889373779},{"id":"https://openalex.org/keywords/patient-safety","display_name":"Patient safety","score":0.453000009059906},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.4235000014305115},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/patient-care","display_name":"Patient care","score":0.3779999911785126},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.36079999804496765},{"id":"https://openalex.org/keywords/medical-history","display_name":"Medical history","score":0.3560999929904938},{"id":"https://openalex.org/keywords/medical-care","display_name":"Medical care","score":0.33059999346733093},{"id":"https://openalex.org/keywords/outcome","display_name":"Outcome (game theory)","score":0.32330000400543213}],"concepts":[{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.5415999889373779},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.453000009059906},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.4235000014305115},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3962000012397766},{"id":"https://openalex.org/C545542383","wikidata":"https://www.wikidata.org/wiki/Q2751242","display_name":"Medical emergency","level":1,"score":0.39419999718666077},{"id":"https://openalex.org/C2989236134","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Patient care","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.36079999804496765},{"id":"https://openalex.org/C206179267","wikidata":"https://www.wikidata.org/wiki/Q188952","display_name":"Medical history","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34549999237060547},{"id":"https://openalex.org/C3018838755","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Medical care","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C156325361","wikidata":"https://www.wikidata.org/wiki/Q1152864","display_name":"Grounded theory","level":3,"score":0.31310001015663147},{"id":"https://openalex.org/C159110408","wikidata":"https://www.wikidata.org/wiki/Q121176","display_name":"Nursing","level":1,"score":0.296099990606308},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C3019150057","wikidata":"https://www.wikidata.org/wiki/Q92779279","display_name":"Medical information","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C138816342","wikidata":"https://www.wikidata.org/wiki/Q189603","display_name":"Public health","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C134121241","wikidata":"https://www.wikidata.org/wiki/Q899301","display_name":"Yield (engineering)","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C2779231881","wikidata":"https://www.wikidata.org/wiki/Q5977147","display_name":"Medical literature","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C187155963","wikidata":"https://www.wikidata.org/wiki/Q629029","display_name":"Occupational safety and health","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C166735990","wikidata":"https://www.wikidata.org/wiki/Q1750812","display_name":"Human factors and ergonomics","level":3,"score":0.2637999951839447},{"id":"https://openalex.org/C3017944768","wikidata":"https://www.wikidata.org/wiki/Q1450463","display_name":"Poison control","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C80059220","wikidata":"https://www.wikidata.org/wiki/Q7521302","display_name":"Simulated patient","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.25380000472068787},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.29373","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29373","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.29373","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29373","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4374081492424011,"id":"https://metadata.un.org/sdg/4"},{"display_name":"Peace, Justice and strong institutions","score":0.4003168046474457,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,29,149],"(LLMs)":[3],"are":[4,34],"increasingly":[5],"used":[6],"for":[7],"medical":[8,23,42,68,108],"consultation":[9],"and":[10,47,70,89,121,138,176,183,192],"health":[11],"information":[12,84],"support.":[13],"In":[14,55],"this":[15,56],"high-stakes":[16],"setting,":[17],"safety":[18],"depends":[19],"not":[20],"only":[21],"on":[22,27,105,141],"knowledge,":[24],"but":[25],"also":[26,171],"how":[28],"respond":[30],"when":[31],"patient":[32,49,61,146,168],"inputs":[33],"unclear,":[35],"inconsistent,":[36],"or":[37,165],"misleading.":[38],"However,":[39],"most":[40],"existing":[41,107],"LLM":[43],"evaluations":[44],"assume":[45],"idealized":[46],"well-posed":[48],"questions,":[50],"which":[51],"limits":[52],"their":[53,142],"realism.":[54],"paper,":[57],"we":[58,95,111,153],"study":[59,172],"challenging":[60,145],"behaviors":[62],"that":[63,100,178],"commonly":[64],"arise":[65],"in":[66,162],"real":[67],"consultations":[69],"complicate":[71],"safe":[72],"clinical":[73],"reasoning.":[74],"We":[75,132,170,188],"define":[76],"four":[77,106,173],"clinically":[78],"grounded":[79],"categories":[80],"of":[81,124,136],"such":[82],"behaviors:":[83],"contradiction,":[85],"factual":[86],"inaccuracy,":[87],"self-diagnosis,":[88],"care":[90],"resistance.":[91],"For":[92],"each":[93],"behavior,":[94],"specify":[96],"concrete":[97],"failure":[98,157],"criteria":[99],"capture":[101],"unsafe":[102],"responses.":[103],"Building":[104],"dialogue":[109],"datasets,":[110],"introduce":[112,185],"CPB-Bench":[113],"(Challenging":[114],"Patient":[115],"Behaviors":[116],"Benchmark),":[117],"a":[118,134],"bilingual":[119],"(English":[120],"Chinese)":[122],"benchmark":[123],"692":[125],"multi-turn":[126],"dialogues":[127],"annotated":[128],"with":[129,159],"these":[130],"behaviors.":[131],"evaluate":[133],"range":[135],"open-":[137],"closed-source":[139],"LLMs":[140],"responses":[143],"to":[144],"utterances.":[147],"While":[148],"perform":[150],"well":[151],"overall,":[152],"identify":[154],"consistent,":[155],"behavior-specific":[156],"patterns,":[158],"particular":[160],"difficulty":[161],"handling":[163],"contradictory":[164],"medically":[166],"implausible":[167],"information.":[169],"intervention":[174],"strategies":[175],"find":[177],"they":[179],"yield":[180],"inconsistent":[181],"improvements":[182],"can":[184],"unnecessary":[186],"corrections.":[187],"release":[189],"the":[190],"dataset":[191],"code.":[193]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-02T00:00:00"}
