{"id":"https://openalex.org/W7160633805","doi":"https://doi.org/10.48550/arxiv.2605.06197","title":"Bridging visual saliency and large language models for explainable deep learning in medical imaging","display_name":"Bridging visual saliency and large language models for explainable deep learning in medical imaging","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160633805","doi":"https://doi.org/10.48550/arxiv.2605.06197"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06197","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06197","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135698495","display_name":"Paul Valery Nguezet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguezet, Paul Valery","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135722468","display_name":"Elie Tagne Fute","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fute, Elie Tagne","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068589144","display_name":"Yusuf Brima","orcid":"https://orcid.org/0000-0002-9975-8676"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brima, Yusuf","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135654246","display_name":"Benoit Martin Azanguezet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azanguezet, Benoit Martin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134801586","display_name":"Marcellin Atemkeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atemkeng, Marcellin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.35089999437332153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.35089999437332153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12809999287128448,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.07689999788999557,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.7184000015258789},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6473000049591064},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5139999985694885},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5012999773025513},{"id":"https://openalex.org/keywords/readability","display_name":"Readability","score":0.4383000135421753},{"id":"https://openalex.org/keywords/neuroimaging","display_name":"Neuroimaging","score":0.40119999647140503},{"id":"https://openalex.org/keywords/medical-imaging","display_name":"Medical imaging","score":0.3626999855041504},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3571000099182129},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.34790000319480896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7282999753952026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7189000248908997},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.7184000015258789},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6473000049591064},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5139999985694885},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5012999773025513},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47279998660087585},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.4383000135421753},{"id":"https://openalex.org/C58693492","wikidata":"https://www.wikidata.org/wiki/Q551875","display_name":"Neuroimaging","level":2,"score":0.40119999647140503},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.3626999855041504},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.34790000319480896},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3352000117301941},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.334199994802475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32519999146461487},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C191178318","wikidata":"https://www.wikidata.org/wiki/Q2256906","display_name":"Thresholding","level":3,"score":0.295199990272522},{"id":"https://openalex.org/C95423123","wikidata":"https://www.wikidata.org/wiki/Q622178","display_name":"Workgroup","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2533999979496002},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06197","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06197","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.6701467633247375,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,53],"opaque":[1],"nature":[2],"of":[3,165,235],"deep":[4],"learning":[5],"models":[6,46],"remains":[7],"a":[8,21,68,75,79,143,163,215],"significant":[9],"barrier":[10],"to":[11,48,99,128,155],"their":[12],"clinical":[13,233],"adoption":[14],"in":[15,195],"medical":[16],"imaging.":[17],"This":[18],"paper":[19],"presents":[20],"multimodal":[22],"explainability":[23],"framework":[24,55,219],"that":[25,72,147,222],"bridges":[26],"the":[27,118,124,137,178,185,190,203,218,230],"gap":[28],"between":[29],"convolutional":[30],"neural":[31],"network":[32],"(CNN)":[33],"predictions":[34],"and":[35,78,95,136,153,182,198,211,226,232],"clinically":[36],"actionable":[37],"insights":[38],"for":[39],"brain":[40,169,239],"tumor":[41,109,174,240],"classification,":[42],"leveraging":[43],"large":[44],"language":[45,191],"(LLMs)":[47],"deliver":[49],"human-interpretable":[50],"diagnostic":[51,159],"narratives.":[52],"proposed":[54],"operates":[56],"through":[57],"three":[58,149,173],"coupled":[59],"stages.":[60],"First,":[61],"nine":[62],"CNN":[63],"architectures":[64],"are":[65,97,104,121,140,223],"extended":[66],"with":[67],"dual-output":[69],"hybrid":[70],"formulation":[71],"simultaneously":[73],"optimises":[74],"classification":[76,180],"head":[77],"segmentation":[80,187],"head,":[81],"enabling":[82],"spatially":[83],"richer":[84],"feature":[85],"learning.":[86],"Second,":[87],"visual":[88],"saliency":[89],"attribution":[90],"methods,":[91],"namely":[92],"Grad-CAM,":[93],"Grad-CAM++,":[94],"ScoreCAM,":[96],"applied":[98],"generate":[100,156],"class-discriminative":[101],"heatmaps,":[102],"which":[103],"subsequently":[105],"refined":[106],"into":[107,132,142,214],"binary":[108],"masks":[110,120],"via":[111],"an":[112],"adaptive":[113],"percentile":[114],"thresholding":[115],"pipeline.":[116],"Third,":[117],"resulting":[119],"mapped":[122],"onto":[123],"Harvard-Oxford":[125],"cortical":[126],"atlas":[127],"translate":[129],"pixel-level":[130],"evidence":[131],"named":[133],"neuroanatomical":[134],"structures,":[135],"extracted":[138],"findings":[139],"encoded":[141],"structured":[144],"JSON":[145],"file":[146],"conditions":[148],"LLMs":[150],"(Grok3,":[151],"Mistral,":[152],"LLaMA)":[154],"coherent,":[157],"radiological-style":[158],"reports.":[160],"Evaluated":[161],"on":[162],"dataset":[164],"4,834":[166],"contrast-enhanced":[167],"T1-weighted":[168],"MRI":[170],"images":[171],"spanning":[172],"classes,":[175],"InceptionResNetV2":[176],"achieved":[177,202],"highest":[179,204],"performance":[181],"Grad-CAM++":[183],"yielded":[184],"best":[186],"overlap.":[188],"Among":[189],"models,":[192],"Grok3":[193],"led":[194],"lexical":[196],"diversity":[197],"coherence,":[199],"while":[200],"LLaMA":[201],"readability":[205],"score.":[206],"By":[207],"integrating":[208],"visual,":[209],"anatomical,":[210],"linguistic":[212],"modalities":[213],"unified":[216],"pipeline,":[217],"produces":[220],"explanations":[221],"technically":[224],"grounded":[225],"meaningfully":[227],"interpretable,":[228],"advancing":[229],"transparency":[231],"accountability":[234],"artificial":[236],"intelligence":[237],"assisted":[238],"diagnosis.":[241]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-09T00:00:00"}
