{"id":"https://openalex.org/W7162394307","doi":"https://doi.org/10.48550/arxiv.2605.24902","title":"When Reasoning Hurts: Source-Aware Evaluation of Frontier LLMs for Clinical SOAP Note Generation","display_name":"When Reasoning Hurts: Source-Aware Evaluation of Frontier LLMs for Clinical SOAP Note Generation","publication_year":2026,"publication_date":"2026-05-24","ids":{"openalex":"https://openalex.org/W7162394307","doi":"https://doi.org/10.48550/arxiv.2605.24902"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.24902","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.24902","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.24902","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137032785","display_name":"Faizan Faisal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Faisal, Faizan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5137032785"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6304000020027161,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6304000020027161,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.09049999713897705,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10350","display_name":"Electronic Health Records Systems","score":0.06729999929666519,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/soap","display_name":"SOAP","score":0.7513999938964844},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.5662999749183655},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5475000143051147},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.4544999897480011},{"id":"https://openalex.org/keywords/efficient-frontier","display_name":"Efficient frontier","score":0.3407999873161316},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.33970001339912415}],"concepts":[{"id":"https://openalex.org/C17881449","wikidata":"https://www.wikidata.org/wiki/Q189620","display_name":"SOAP","level":2,"score":0.7513999938964844},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.5662999749183655},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5475000143051147},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.4544999897480011},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45010000467300415},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36800000071525574},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3515999913215637},{"id":"https://openalex.org/C51485801","wikidata":"https://www.wikidata.org/wiki/Q16966861","display_name":"Efficient frontier","level":3,"score":0.3407999873161316},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.33970001339912415},{"id":"https://openalex.org/C2776325391","wikidata":"https://www.wikidata.org/wiki/Q6917865","display_name":"Motivated reasoning","level":3,"score":0.335099995136261},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.305400013923645},{"id":"https://openalex.org/C34778210","wikidata":"https://www.wikidata.org/wiki/Q376791","display_name":"Production\u2013possibility frontier","level":3,"score":0.30399999022483826},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3018999993801117},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.30149999260902405},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.27160000801086426},{"id":"https://openalex.org/C106347477","wikidata":"https://www.wikidata.org/wiki/Q5384228","display_name":"Equating","level":3,"score":0.27129998803138733},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.2678999900817871},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2644999921321869},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.25999999046325684},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2563000023365021},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.24902","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.24902","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.24902","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.24902","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reasoning-enabled":[0],"LLMs":[1],"perform":[2],"strongly":[3],"on":[4],"medical":[5],"reasoning":[6,56,96,118],"benchmarks,":[7],"but":[8],"it":[9],"remains":[10],"unclear":[11],"whether":[12],"these":[13],"gains":[14],"transfer":[15],"to":[16,124],"structured":[17],"clinical":[18,29],"documentation;":[19],"we":[20],"investigate":[21],"this":[22],"question":[23],"using":[24,65],"SOAP":[25,127],"note":[26,128],"generation":[27,60,129],"from":[28],"dialogue":[30],"in":[31,47],"a":[32,48,79],"source-aware":[33],"benchmark":[34],"spanning":[35],"OMI":[36],"Health,":[37],"ACI-Bench,":[38],"and":[39,45,57],"PriMock57.":[40],"We":[41],"evaluate":[42],"GPT-5.4,":[43],"DeepSeek-V4-Flash,":[44],"Gemma-4-E4B":[46],"controlled":[49],"2x2":[50],"design":[51],"that":[52,78,116],"independently":[53],"toggles":[54],"provider-native":[55],"same-source":[58,106],"retrieval-augmented":[59],"(RAG).":[61],"Outputs":[62],"are":[63],"assessed":[64],"seven":[66],"automatic":[67],"metrics":[68],"alongside":[69],"two":[70],"reference-aware":[71],"LLM":[72],"judges.":[73],"Both":[74],"evaluation":[75],"approaches":[76],"agree":[77],"non-reasoning":[80],"GPT-5.4":[81,99],"configuration":[82],"achieves":[83],"the":[84,113],"highest":[85],"overall":[86],"quality,":[87],"while":[88],"DeepSeek-V4-Flash":[89],"performs":[90],"best":[91],"among":[92],"reasoning-enabled":[93],"configurations.":[94],"Enabling":[95],"significantly":[97],"degrades":[98],"performance":[100],"across":[101],"all":[102],"three":[103],"datasets,":[104],"whereas":[105],"RAG":[107],"yields":[108],"smaller,":[109],"model-dependent":[110],"improvements.":[111],"Overall,":[112],"findings":[114],"indicate":[115],"stronger":[117],"capability":[119],"should":[120],"not":[121],"be":[122],"assumed":[123],"improve":[125],"fidelity-sensitive":[126],"without":[130],"dedicated,":[131],"task-specific":[132],"evaluation.":[133]},"counts_by_year":[],"updated_date":"2026-05-27T06:22:25.060010","created_date":"2026-05-27T00:00:00"}
