{"id":"https://openalex.org/W7160424886","doi":"https://doi.org/10.48550/arxiv.2605.04039","title":"Safety and accuracy follow different scaling laws in clinical large language models","display_name":"Safety and accuracy follow different scaling laws in clinical large language models","publication_year":2026,"publication_date":"2026-05-05","ids":{"openalex":"https://openalex.org/W7160424886","doi":"https://doi.org/10.48550/arxiv.2605.04039"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.04039","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04039","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.04039","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5117210978","display_name":"Sebastian Wind","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wind, Sebastian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065772285","display_name":"Tri-Thien Nguyen","orcid":"https://orcid.org/0009-0007-0974-3299"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Tri-Thien","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120560858","display_name":"Jeta Sopa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sopa, Jeta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040088150","display_name":"Mahshad Lotfinia","orcid":"https://orcid.org/0000-0001-7605-7992"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lotfinia, Mahshad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135442267","display_name":"Sebastian Bickelhaup","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bickelhaup, Sebastian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082305673","display_name":"Michael Uder","orcid":"https://orcid.org/0000-0001-6238-4247"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Uder, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125303218","display_name":"Harald K\u00f6stler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K\u00f6stler, Harald","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135512064","display_name":"Gerhard Wellein","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wellein, Gerhard","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135504214","display_name":"Sven Nebelung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nebelung, Sven","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016512818","display_name":"Daniel Truhn","orcid":"https://orcid.org/0000-0002-9605-0728"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Truhn, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135442149","display_name":"Andreas Maier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maier, Andreas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5076251937","display_name":"Soroosh Tayebi Arasteh","orcid":"https://orcid.org/0000-0003-1015-7733"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arasteh, Soroosh Tayebi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.7810999751091003,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.7810999751091003,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07699999958276749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.034299999475479126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overconfidence-effect","display_name":"Overconfidence effect","score":0.8809999823570251},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6363999843597412},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.5449000000953674},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.43860000371932983},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4002000093460083},{"id":"https://openalex.org/keywords/contradiction","display_name":"Contradiction","score":0.3610000014305115}],"concepts":[{"id":"https://openalex.org/C51110983","wikidata":"https://www.wikidata.org/wiki/Q16503490","display_name":"Overconfidence effect","level":2,"score":0.8809999823570251},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6363999843597412},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.5449000000953674},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48980000615119934},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.43860000371932983},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4178999960422516},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4002000093460083},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.37310001254081726},{"id":"https://openalex.org/C2776728590","wikidata":"https://www.wikidata.org/wiki/Q363948","display_name":"Contradiction","level":2,"score":0.3610000014305115},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.3467000126838684},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.33970001339912415},{"id":"https://openalex.org/C162118730","wikidata":"https://www.wikidata.org/wiki/Q1128453","display_name":"Actuarial science","level":1,"score":0.336899995803833},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.329800009727478},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3095000088214874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30820000171661377},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2978000044822693},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C3017944768","wikidata":"https://www.wikidata.org/wiki/Q1450463","display_name":"Poison control","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2705000042915344},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2529999911785126}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.04039","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04039","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.04039","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04039","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5138846039772034}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Clinical":[0,221],"LLMs":[1,109],"are":[2],"often":[3],"scaled":[4],"by":[5,237],"increasing":[6,134],"model":[7,60],"size,":[8],"context":[9,66,242],"length,":[10],"retrieval":[11,64,240],"complexity,":[12],"or":[13,37],"inference-time":[14,69,201],"compute,":[15],"with":[16,87],"the":[17,131,196],"implicit":[18],"expectation":[19],"that":[20,210],"higher":[21],"accuracy":[22,136,175],"implies":[23],"safer":[24],"behavior.":[25,247],"This":[26],"assumption":[27],"is":[28,224],"incomplete":[29],"in":[30,215],"medicine,":[31],"where":[32],"a":[33,50,78,216,227,233],"few":[34],"confident,":[35],"high-risk,":[36],"evidence-contradicting":[38],"errors":[39,213],"can":[40],"matter":[41],"more":[42],"than":[43],"average":[44],"benchmark":[45,82],"performance.":[46],"We":[47,104],"introduce":[48,76],"SaFE-Scale,":[49],"framework":[51],"for":[52,96],"measuring":[53],"how":[54],"clinical":[55],"LLM":[56,222],"safety":[57,170,197,223],"changes":[58],"across":[59,110],"scale,":[61],"evidence":[62,102,129,238],"quality,":[63,239],"strategy,":[65],"exposure,":[67],"and":[68,93,101,125,154,163,179,185,199,244],"compute.":[70],"To":[71],"instantiate":[72],"this":[73,169],"framework,":[74],"we":[75],"RadSaFE-200,":[77],"Radiology":[79],"Safety-Focused":[80],"Evaluation":[81],"of":[83,219,230],"200":[84],"multiple-choice":[85],"questions":[86],"clinician-defined":[88],"clean":[89,117],"evidence,":[90,92,118,120],"conflict":[91,119],"option-level":[94],"labels":[95],"high-risk":[97,143,183],"error,":[98],"unsafe":[99],"answer,":[100],"contradiction.":[103],"evaluated":[105],"34":[106],"locally":[107],"deployed":[108],"six":[111],"deployment":[112,234],"conditions:":[113],"closed-book":[114],"prompting":[115,191],"(zero-shot),":[116],"standard":[121,177],"RAG,":[122,124],"agentic":[123,164,172],"max-context":[126],"prompting.":[127],"Clean":[128],"produced":[130,203],"strongest":[132],"improvement,":[133],"mean":[135],"from":[137,145,150,157],"73.5%":[138],"to":[139,147,152,159],"94.1%,":[140],"while":[141],"reducing":[142],"error":[144,184],"12.0%":[146],"2.6%,":[148],"contradiction":[149],"12.7%":[151],"2.3%,":[153],"dangerous":[155,186],"overconfidence":[156,187],"8.0%":[158],"1.6%.":[160],"Standard":[161],"RAG":[162,165,173,178],"did":[166],"not":[167,226],"reproduce":[168],"profile:":[171],"improved":[174],"over":[176],"reduced":[180],"contradiction,":[181],"but":[182,232],"remained":[188],"elevated.":[189],"Max-context":[190],"increased":[192],"latency":[193],"without":[194],"closing":[195],"gap,":[198],"additional":[200],"compute":[202],"only":[204],"limited":[205],"gains.":[206],"Worst-case":[207],"analysis":[208],"showed":[209],"clinically":[211],"consequential":[212],"concentrated":[214],"small":[217],"subset":[218],"questions.":[220],"therefore":[225],"passive":[228],"consequence":[229],"scaling,":[231],"property":[235],"shaped":[236],"design,":[241],"construction,":[243],"collective":[245],"failure":[246]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-07T00:00:00"}
