{"id":"https://openalex.org/W7134265113","doi":"https://doi.org/10.48550/arxiv.2603.06183","title":"CRIMSON: A Clinically-Grounded LLM-Based Metric for Generative Radiology Report Evaluation","display_name":"CRIMSON: A Clinically-Grounded LLM-Based Metric for Generative Radiology Report Evaluation","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134265113","doi":"https://doi.org/10.48550/arxiv.2603.06183"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.06183","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092998246","display_name":"Mohammed Baharoon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baharoon, Mohammed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120050330","display_name":"Thibault Heintz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heintz, Thibault","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008582770","display_name":"Siavash Raissi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raissi, Siavash","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084234990","display_name":"Mahmoud Alabbad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alabbad, Mahmoud","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128399116","display_name":"Mona Alhammad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alhammad, Mona","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043758008","display_name":"Hassan Alomaish","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"AlOmaish, Hassan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128435592","display_name":"Sung Hyun Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Sung Eun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057010395","display_name":"Oishi Banerjee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Banerjee, Oishi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128502355","display_name":"Pranav Rajpurkar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rajpurkar, Pranav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11894","display_name":"Radiology practices and education","score":0.6341999769210815,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11894","display_name":"Radiology practices and education","score":0.6341999769210815,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.1307000070810318,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.03420000150799751,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5475000143051147},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5300999879837036},{"id":"https://openalex.org/keywords/snomed-ct","display_name":"SNOMED CT","score":0.5102999806404114},{"id":"https://openalex.org/keywords/guideline","display_name":"Guideline","score":0.47839999198913574},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4449000060558319},{"id":"https://openalex.org/keywords/clinical-practice","display_name":"Clinical Practice","score":0.41600000858306885},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.40619999170303345},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.40290001034736633}],"concepts":[{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5475000143051147},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.5304999947547913},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5300999879837036},{"id":"https://openalex.org/C19527891","wikidata":"https://www.wikidata.org/wiki/Q1120908","display_name":"Medical physics","level":1,"score":0.5257999897003174},{"id":"https://openalex.org/C206497026","wikidata":"https://www.wikidata.org/wiki/Q1753883","display_name":"SNOMED CT","level":3,"score":0.5102999806404114},{"id":"https://openalex.org/C2780182762","wikidata":"https://www.wikidata.org/wiki/Q1630279","display_name":"Guideline","level":2,"score":0.47839999198913574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44999998807907104},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.41600000858306885},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.40619999170303345},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3781999945640564},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36890000104904175},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.3675000071525574},{"id":"https://openalex.org/C2989236134","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Patient care","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3465000092983246},{"id":"https://openalex.org/C71405471","wikidata":"https://www.wikidata.org/wiki/Q757012","display_name":"Quality management","level":3,"score":0.3303999900817871},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.30070000886917114},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C2989179672","wikidata":"https://www.wikidata.org/wiki/Q6806500","display_name":"Clinical decision making","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25380000472068787},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.06183","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.06183","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06183","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.06183","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5652834177017212}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,202],"introduce":[1],"CRIMSON,":[2],"a":[3,61,84,96,152,169,213],"clinically":[4,45,110,123,156],"grounded":[5],"evaluation":[6,207,220],"framework":[7,57],"for":[8],"chest":[9],"X-ray":[10],"report":[11,222],"generation":[12],"that":[13,108,147],"assesses":[14],"reports":[15],"based":[16,94],"on":[17,52,95],"diagnostic":[18,78],"correctness,":[19],"contextual":[20],"relevance,":[21],"and":[22,37,41,69,77,142,185,210,212],"patient":[23,34],"safety.":[24],"Unlike":[25],"prior":[26],"metrics,":[27],"CRIMSON":[28,116,160,194],"incorporates":[29],"full":[30],"clinical":[31,85],"context,":[32],"including":[33],"age,":[35],"indication,":[36],"guideline-based":[38],"decision":[39],"rules,":[40],"prevents":[42],"normal":[43],"or":[44,92],"insignificant":[46],"findings":[47],"from":[48,190],"exerting":[49],"disproportionate":[50],"influence":[51],"the":[53,196,204,206],"overall":[54,187],"score.":[55],"The":[56],"categorizes":[58],"errors":[59,72],"into":[60],"comprehensive":[62],"taxonomy":[63],"covering":[64],"false":[65],"findings,":[66,68],"missing":[67],"eight":[70],"attribute-level":[71],"(e.g.,":[73],"location,":[74],"severity,":[75],"measurement,":[76],"overinterpretation).":[79],"Each":[80],"finding":[81],"is":[82,117],"assigned":[83],"significance":[86],"level":[87],"(urgent,":[88],"actionable":[89],"non-urgent,":[90],"non-actionable,":[91],"expected/benign),":[93],"guideline":[97],"developed":[98],"in":[99,132],"collaboration":[100],"with":[101,122,164,179,199],"attending":[102],"cardiothoracic":[103,192],"radiologists,":[104,193],"enabling":[105],"severity-aware":[106],"weighting":[107],"prioritizes":[109],"consequential":[111],"mistakes":[112],"over":[113,175],"benign":[114],"discrepancies.":[115],"validated":[118],"through":[119,143],"strong":[120],"alignment":[121,198],"significant":[124],"error":[125,181],"counts":[126],"annotated":[127],"by":[128],"six":[129],"board-certified":[130],"radiologists":[131],"ReXVal":[133],"(Kendalls":[134],"tau":[135],"=":[136,140],"0.61-0.71;":[137],"Pearsons":[138],"r":[139],"0.71-0.84),":[141],"two":[144],"additional":[145],"benchmarks":[146],"we":[148],"introduce.":[149],"In":[150,167],"RadJudge,":[151],"targeted":[153],"suite":[154],"of":[155,174,221],"challenging":[157],"pass-fail":[158],"scenarios,":[159],"shows":[161],"consistent":[162],"agreement":[163],"expert":[165],"judgment.":[166],"RadPref,":[168,211],"larger":[170],"radiologist":[171,200],"preference":[172],"benchmark":[173],"100":[176],"pairwise":[177],"cases":[178],"structured":[180],"categorization,":[182],"severity":[183],"modeling,":[184],"1-5":[186],"quality":[188],"ratings":[189],"three":[191],"achieves":[195],"strongest":[197],"preferences.":[201],"release":[203],"metric,":[205],"benchmarks,":[208],"RadJudge":[209],"fine-tuned":[214],"MedGemma":[215],"model":[216],"to":[217],"enable":[218],"reproducible":[219],"generation,":[223],"all":[224],"available":[225],"at":[226],"https://github.com/rajpurkarlab/CRIMSON.":[227]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-10T00:00:00"}
