{"id":"https://openalex.org/W7154225043","doi":"https://doi.org/10.48550/arxiv.2604.10437","title":"Enhancing Fine-Grained Spatial Grounding in 3D CT Report Generation via Discriminative Guidance","display_name":"Enhancing Fine-Grained Spatial Grounding in 3D CT Report Generation via Discriminative Guidance","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154225043","doi":"https://doi.org/10.48550/arxiv.2604.10437"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10437","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10437","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10437","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133619485","display_name":"Chenyu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Chenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133605137","display_name":"Weicheng Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Weicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133614458","display_name":"Han Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133596404","display_name":"Wenchao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133568695","display_name":"Kayhan Batmanghelich","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Batmanghelich, Kayhan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5133619485"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.4311999976634979,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.4311999976634979,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.17170000076293945,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11739999800920486,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8039000034332275},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6972000002861023},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5073999762535095},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.45159998536109924},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3874000012874603},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.358599990606308}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8039000034332275},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6972000002861023},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6801000237464905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6118000149726868},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5073999762535095},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.45159998536109924},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4101000130176544},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3874000012874603},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.3465000092983246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3246999979019165},{"id":"https://openalex.org/C544519230","wikidata":"https://www.wikidata.org/wiki/Q32566","display_name":"Computed tomography","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C2986522900","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relationship","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10437","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10437","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10437","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10437","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7567389607429504,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision--language":[0],"models":[1,167],"(VLMs)":[2],"for":[3,54,76,166],"radiology":[4,23],"report":[5,50,102],"generation":[6,103],"(RRG)":[7],"can":[8],"produce":[9],"long-form":[10],"chest":[11],"CT":[12,44],"reports":[13,96],"from":[14,94,120,133],"volumetric":[15],"scans":[16],"and":[17,26,60,73,97,126],"show":[18],"strong":[19],"potential":[20],"to":[21,100,122,136,153],"improve":[22],"workflow":[24],"efficiency":[25],"consistency.":[27],"However,":[28],"existing":[29],"methods":[30],"face":[31],"two":[32],"key":[33],"limitations:":[34],"(i)":[35],"training":[36],"supervision":[37],"is":[38,63],"often":[39],"coarse,":[40],"aligning":[41],"a":[42,47,87,143],"whole":[43],"volume":[45],"with":[46,83],"full":[48],"free-text":[49,95],"without":[51],"explicit":[52],"alignment":[53],"fine-grained":[55,92,160],"attributes":[56],"or":[57,70],"pathology":[58],"locations;":[59],"(ii)":[61],"evaluation":[62],"typically":[64],"holistic":[65],"(lexical":[66],"overlap,":[67],"entity":[68],"matching,":[69],"LLM-as-a-judge":[71],"scores)":[72],"not":[74],"diagnostic":[75],"spatial":[77,161],"grounding.":[78],"We":[79],"propose":[80],"\\emph{Discriminative":[81],"Cue-Prompting":[82],"Prompt":[84],"Dropout":[85],"(DCP-PD)},":[86],"plug-and-play":[88],"framework":[89],"that":[90,159,168],"distills":[91],"cues":[93],"uses":[98],"them":[99],"guide":[101],"while":[104],"mitigating":[105],"shortcut":[106],"reliance":[107],"via":[108],"prompt":[109],"dropout.":[110],"DCP-PD":[111],"achieves":[112],"state-of-the-art":[113],"performance":[114,130],"on":[115,131,171],"CT-RATE,":[116],"improving":[117],"macro":[118],"F1":[119,134],"$=0.501$":[121],"$0.603$":[123],"(20%":[124],"relative),":[125],"substantially":[127],"boosts":[128],"out-of-distribution":[129],"Rad-ChestCT":[132],"$=0.266$":[135],"$0.503$":[137],"(89%":[138],"relative).":[139],"Finally,":[140],"we":[141],"introduce":[142],"hierarchical,":[144],"location-aware":[145],"question-set":[146],"protocol":[147],"(presence":[148],"$\\rightarrow$":[149,151],"laterality":[150],"lobe)":[152],"directly":[154],"assess":[155],"pathology-location":[156],"grounding,":[157],"showing":[158],"localization":[162],"remains":[163],"challenging":[164],"even":[165],"score":[169],"highly":[170],"current":[172],"benchmarks.":[173]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
