{"id":"https://openalex.org/W4415994346","doi":"https://doi.org/10.48550/arxiv.2508.21777","title":"Benchmarking GPT-5 in Radiation Oncology: Measurable Gains, but Persistent Need for Expert Oversight","display_name":"Benchmarking GPT-5 in Radiation Oncology: Measurable Gains, but Persistent Need for Expert Oversight","publication_year":2025,"publication_date":"2025-08-29","ids":{"openalex":"https://openalex.org/W4415994346","doi":"https://doi.org/10.48550/arxiv.2508.21777"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2508.21777","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.21777","pdf_url":"https://arxiv.org/pdf/2508.21777","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.21777","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087796640","display_name":"U\u011fur Din\u00e7","orcid":"https://orcid.org/0000-0002-3538-5567"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dinc, Ugur","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sarkar, Jibak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sarkar, Jibak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018016169","display_name":"Philipp Schubert","orcid":"https://orcid.org/0000-0003-1838-7250"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schubert, Philipp","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010448955","display_name":"Sabine Semrau","orcid":"https://orcid.org/0000-0001-7049-6003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Semrau, Sabine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078178214","display_name":"Thomas Wei\u00dfmann","orcid":"https://orcid.org/0000-0002-1443-5422"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weissmann, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060773519","display_name":"Andre Karius","orcid":"https://orcid.org/0000-0002-3842-1375"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karius, Andre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108331624","display_name":"Johann Brand","orcid":"https://orcid.org/0009-0003-4685-1396"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brand, Johann","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117621652","display_name":"Bernd\u2010Niklas Axer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Axer, Bernd-Niklas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103786546","display_name":"Ahmed M. Gomaa","orcid":"https://orcid.org/0009-0007-6884-2350"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gomaa, Ahmed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004627177","display_name":"Pluvio Stephan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stephan, Pluvio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120294780","display_name":"Ishita Sheth","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sheth, Ishita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114543822","display_name":"Sogand Beirami","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beirami, Sogand","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000999118","display_name":"Annette Schwarz","orcid":"https://orcid.org/0009-0001-1256-2647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schwarz, Annette","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005230207","display_name":"Udo S. Gaipl","orcid":"https://orcid.org/0000-0001-6375-5476"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gaipl, Udo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030709807","display_name":"Benjamin Frey","orcid":"https://orcid.org/0000-0001-6743-3351"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Frey, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067659188","display_name":"Christoph Bert","orcid":"https://orcid.org/0000-0002-8539-6600"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bert, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004434507","display_name":"Stefanie Corradini","orcid":"https://orcid.org/0000-0001-8709-7252"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Corradini, Stefanie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Fietkau, Rainer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fietkau, Rainer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020689634","display_name":"Florian Putz","orcid":"https://orcid.org/0000-0003-3966-2872"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Putz, Florian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":19,"corresponding_author_ids":["https://openalex.org/A5087796640"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.46309998631477356,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.46309998631477356,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12422","display_name":"Radiomics and Machine Learning in Medical Imaging","score":0.3743000030517578,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.02199999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vignette","display_name":"Vignette","score":0.7678999900817871},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6715999841690063},{"id":"https://openalex.org/keywords/radiation-oncology","display_name":"Radiation oncology","score":0.6326000094413757},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5792999863624573},{"id":"https://openalex.org/keywords/radiation-therapist","display_name":"Radiation Therapist","score":0.42820000648498535},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41350001096725464},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4016000032424927},{"id":"https://openalex.org/keywords/radiation-treatment-planning","display_name":"Radiation treatment planning","score":0.38830000162124634}],"concepts":[{"id":"https://openalex.org/C9719361","wikidata":"https://www.wikidata.org/wiki/Q7928967","display_name":"Vignette","level":2,"score":0.7678999900817871},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6715999841690063},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.6660000085830688},{"id":"https://openalex.org/C19527891","wikidata":"https://www.wikidata.org/wiki/Q1120908","display_name":"Medical physics","level":1,"score":0.6635000109672546},{"id":"https://openalex.org/C2992520072","wikidata":"https://www.wikidata.org/wiki/Q180507","display_name":"Radiation oncology","level":3,"score":0.6326000094413757},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5792999863624573},{"id":"https://openalex.org/C2909208188","wikidata":"https://www.wikidata.org/wiki/Q2046313","display_name":"Radiation Therapist","level":3,"score":0.42820000648498535},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41350001096725464},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4016000032424927},{"id":"https://openalex.org/C201645570","wikidata":"https://www.wikidata.org/wiki/Q830637","display_name":"Radiation treatment planning","level":3,"score":0.38830000162124634},{"id":"https://openalex.org/C2987700449","wikidata":"https://www.wikidata.org/wiki/Q186161","display_name":"Radiation dose","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C509974204","wikidata":"https://www.wikidata.org/wiki/Q180507","display_name":"Radiation therapy","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C535046627","wikidata":"https://www.wikidata.org/wiki/Q30612","display_name":"Clinical trial","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C2776289891","wikidata":"https://www.wikidata.org/wiki/Q1931511","display_name":"Neglect","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2671000063419342},{"id":"https://openalex.org/C2779925993","wikidata":"https://www.wikidata.org/wiki/Q336938","display_name":"Radiation exposure","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C2776598537","wikidata":"https://www.wikidata.org/wiki/Q1812651","display_name":"Clinical Oncology","level":3,"score":0.2590000033378601}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2508.21777","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.21777","pdf_url":"https://arxiv.org/pdf/2508.21777","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.21777","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.21777","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.21777","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.21777","pdf_url":"https://arxiv.org/pdf/2508.21777","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Introduction:":[0],"Large":[1],"language":[2],"models":[3],"(LLM)":[4],"have":[5],"shown":[6],"great":[7],"potential":[8],"in":[9,117,168,173,202],"clinical":[10,169,182,234],"decision":[11],"support.":[12],"GPT-5":[13,70,99,185,198],"is":[14],"a":[15,50,101],"novel":[16],"LLM":[17],"system":[18],"that":[19,226],"has":[20],"been":[21],"specifically":[22],"marketed":[23],"towards":[24],"oncology":[25,194,206],"use.":[26],"Methods:":[27],"Performance":[28],"was":[29,71,89,158],"assessed":[30],"using":[31,91],"two":[32],"complementary":[33],"benchmarks:":[34],"(i)":[35],"the":[36,67,96,122,192,220],"ACR":[37],"Radiation":[38],"Oncology":[39],"In-Training":[40],"Examination":[41],"(TXIT,":[42],"2021),":[43],"comprising":[44],"300":[45],"multiple-choice":[46,195],"items,":[47],"and":[48,63,85,109,119,138],"(ii)":[49],"curated":[51],"set":[52],"of":[53,104,222],"60":[54],"authentic":[55],"radiation":[56,80,193,205],"oncologic":[57],"vignettes":[58],"representing":[59],"diverse":[60],"disease":[61],"sites":[62],"treatment":[64,126,207],"indications.":[65],"For":[66],"vignette":[68,123],"evaluation,":[69,124],"instructed":[72],"to":[73],"generate":[74],"concise":[75],"therapeutic":[76],"plans.":[77],"Four":[78],"board-certified":[79],"oncologists":[81],"rated":[82,129],"correctness,":[83],"comprehensiveness,":[84],"hallucinations.":[86],"Inter-rater":[87,156],"reliability":[88],"quantified":[90],"Fleiss'":[92],"\\k{appa}.":[93],"Results:":[94],"On":[95],"TXIT":[97],"benchmark,":[98],"achieved":[100],"mean":[102],"accuracy":[103],"92.8%,":[105],"outperforming":[106],"GPT-4":[107],"(78.8%)":[108],"GPT-3.5":[110],"(62.1%).":[111],"Domain-specific":[112],"gains":[113],"were":[114,128,145,218],"most":[115],"pronounced":[116],"Dose":[118],"Diagnosis.":[120],"In":[121],"GPT-5's":[125],"recommendations":[127,228],"highly":[130],"for":[131,153,163,213],"correctness":[132,209],"(mean":[133],"3.24/4,":[134],"95%":[135,141],"CI:":[136,142],"3.11-3.38)":[137],"comprehensiveness":[139],"(3.59/4,":[140],"3.49-3.69).":[143],"Hallucinations":[144],"rare":[146],"with":[147],"no":[148],"case":[149],"reaching":[150],"majority":[151],"consensus":[152],"their":[154],"presence.":[155],"agreement":[157],"low":[159],"(Fleiss'":[160],"\\k{appa}":[161],"0.083":[162],"correctness),":[164],"reflecting":[165],"inherent":[166],"variability":[167],"judgment.":[170],"Errors":[171],"clustered":[172],"complex":[174],"scenarios":[175],"requiring":[176],"precise":[177],"trial":[178],"knowledge":[179],"or":[180],"detailed":[181],"adaptation.":[183],"Discussion:":[184],"clearly":[186],"outperformed":[187],"prior":[188],"model":[189],"variants":[190],"on":[191],"benchmark.":[196],"Although":[197],"exhibited":[199],"favorable":[200],"performance":[201],"generating":[203],"real-world":[204],"recommendations,":[208],"ratings":[210],"indicate":[211],"room":[212],"further":[214],"improvement.":[215],"While":[216],"hallucinations":[217],"infrequent,":[219],"presence":[221],"substantive":[223],"errors":[224],"underscores":[225],"GPT-5-generated":[227],"require":[229],"rigorous":[230],"expert":[231],"oversight":[232],"before":[233],"implementation.":[235]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
