{"id":"https://openalex.org/W7157165160","doi":"https://doi.org/10.48550/arxiv.2604.24001","title":"CT-FineBench: A Diagnostic Fidelity Benchmark for Fine-Grained Evaluation of CT Report Generation","display_name":"CT-FineBench: A Diagnostic Fidelity Benchmark for Fine-Grained Evaluation of CT Report Generation","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7157165160","doi":"https://doi.org/10.48550/arxiv.2604.24001"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24001","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134757365","display_name":"Ruifeng Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Ruifeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066605043","display_name":"Wanxing Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Wanxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134772515","display_name":"Weiwei Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Weiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134782227","display_name":"Bowen Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Bowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134777505","display_name":"Zhongyu Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Zhongyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134777628","display_name":"Ling Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ling","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134794969","display_name":"Jianpeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jianpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2705000042915344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2705000042915344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11894","display_name":"Radiology practices and education","score":0.19529999792575836,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.11289999634027481,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8137000203132629},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.7300000190734863},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6424999833106995},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5307999849319458},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.5285999774932861},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4790000021457672},{"id":"https://openalex.org/keywords/lexical-diversity","display_name":"Lexical diversity","score":0.44940000772476196},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.38089999556541443}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8137000203132629},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.7300000190734863},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204999923706055},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6424999833106995},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5515999794006348},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5307999849319458},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.5285999774932861},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.504800021648407},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4790000021457672},{"id":"https://openalex.org/C2781202465","wikidata":"https://www.wikidata.org/wiki/Q18346297","display_name":"Lexical diversity","level":3,"score":0.44940000772476196},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4383000135421753},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3504999876022339},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2854999899864197},{"id":"https://openalex.org/C544519230","wikidata":"https://www.wikidata.org/wiki/Q32566","display_name":"Computed tomography","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C60478076","wikidata":"https://www.wikidata.org/wiki/Q3036835","display_name":"Reference data","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C3020132585","wikidata":"https://www.wikidata.org/wiki/Q2671652","display_name":"Diagnostic accuracy","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,133],"evaluation":[1,36,134],"of":[2,20,26,31,42,80,152],"generated":[3],"reports":[4],"remains":[5],"a":[6,67,93,119,145,158],"critical":[7],"challenge":[8],"in":[9,130],"Computed":[10],"Tomography":[11],"(CT)":[12],"report":[13,147],"generation,":[14],"due":[15],"to":[16,50,74,143,169,189],"the":[17,22,29,52,76,150,153],"large":[18],"volume":[19],"text,":[21],"diversity":[23],"and":[24,28,48,72,86,102,148,161,184],"complexity":[25],"findings,":[27],"presence":[30],"fine-grained,":[32],"disease-oriented":[33],"attributes.":[34],"Conventional":[35],"metrics":[37],"offer":[38],"only":[39],"coarse":[40],"measures":[41],"lexical":[43,167],"overlap":[44,168],"or":[45],"entity":[46],"matching":[47],"fail":[49],"reflect":[51],"granular":[53],"diagnostic":[54],"accuracy":[55],"required":[56],"for":[57,125,136,157],"clinical":[58,106,127,172,182],"use.":[59],"To":[60],"address":[61],"this":[62,140],"gap,":[63],"we":[64,100,113],"propose":[65],"CT-FineBench,":[66],"benchmark":[68,89],"built":[69],"from":[70,84],"CT-RATE":[71,85],"Merlin":[73],"evaluate":[75],"fine-grained":[77,190],"factual":[78,191],"consistency":[79],"CT":[81],"reports,":[82],"constructed":[83,91],"Merlin.":[87],"Our":[88],"is":[90,185],"through":[92],"meticulous,":[94],"Question-Answering":[95],"(QA)":[96],"based":[97],"process:":[98],"first,":[99],"identify":[101],"structure":[103],"key,":[104],"finding-specific":[105],"attributes":[107,117],"(like":[108],"location,":[109],"size,":[110],"margin).":[111],"Second,":[112],"systematically":[114],"transform":[115],"these":[116],"into":[118],"QA":[120,141],"dataset,":[121],"where":[122],"questions":[123],"probe":[124],"specific":[126,171],"details":[128],"grounded":[129],"gold-standard":[131],"reports.":[132],"protocol":[135],"CT-FineBench":[137,177],"involves":[138],"using":[139],"dataset":[142],"query":[144],"machine-generated":[146],"scoring":[149],"correctness":[151],"answers.":[154],"This":[155],"allows":[156],"comprehensive,":[159],"interpretable,":[160],"clinically-relevant":[162],"assessment,":[163],"moving":[164],"beyond":[165],"superficial":[166],"pinpoint":[170],"errors.":[173],"Experiments":[174],"show":[175],"that":[176],"correlates":[178],"better":[179],"with":[180],"expert":[181],"assessment":[183],"substantially":[186],"more":[187],"sensitive":[188],"errors":[192],"than":[193],"prior":[194],"metrics.":[195]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-29T00:00:00"}
