{"id":"https://openalex.org/W4414856603","doi":"https://doi.org/10.1109/tmi.2025.3617289","title":"Beyond Static Knowledge: Dynamic Context-Aware Cross-Modal Contrastive Learning for Medical Visual Question Answering","display_name":"Beyond Static Knowledge: Dynamic Context-Aware Cross-Modal Contrastive Learning for Medical Visual Question Answering","publication_year":2025,"publication_date":"2025-10-06","ids":{"openalex":"https://openalex.org/W4414856603","doi":"https://doi.org/10.1109/tmi.2025.3617289","pmid":"https://pubmed.ncbi.nlm.nih.gov/41052164"},"language":"en","primary_location":{"id":"doi:10.1109/tmi.2025.3617289","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmi.2025.3617289","pdf_url":null,"source":{"id":"https://openalex.org/S58069681","display_name":"IEEE Transactions on Medical Imaging","issn_l":"0278-0062","issn":["0278-0062","1558-254X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Medical Imaging","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030499511","display_name":"Rui Yang","orcid":"https://orcid.org/0000-0002-5634-5476"},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Yang","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100350053","display_name":"Lijun Liu","orcid":"https://orcid.org/0000-0003-4543-0111"},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lijun Liu","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059808916","display_name":"Xupeng Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xupeng Feng","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053073532","display_name":"Wei Peng","orcid":"https://orcid.org/0000-0002-9572-951X"},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Peng","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050727001","display_name":"Xiaobing Yang","orcid":"https://orcid.org/0000-0002-4601-7444"},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobing Yang","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I10660446"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030499511"],"corresponding_institution_ids":["https://openalex.org/I10660446"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27239514,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"45","issue":"3","first_page":"1075","last_page":"1087"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9740999937057495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9527000188827515,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5164999961853027},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5081999897956848},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4586000144481659},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.4546999931335449},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.44699999690055847},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.41339999437332153},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.39559999108314514},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3871000111103058},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.3781999945640564},{"id":"https://openalex.org/keywords/knowledge-representation-and-reasoning","display_name":"Knowledge representation and reasoning","score":0.37369999289512634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8504999876022339},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5819000005722046},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5164999961853027},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5081999897956848},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4586000144481659},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.4546999931335449},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45190000534057617},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.44699999690055847},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41339999437332153},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.39559999108314514},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3871000111103058},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3781999945640564},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3734000027179718},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.34950000047683716},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.33379998803138733},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.31200000643730164},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.29330000281333923},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2906999886035919},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.27959999442100525},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.27469998598098755},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2646999955177307},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.26269999146461487}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007090","descriptor_name":"Image Interpretation, Computer-Assisted","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1109/tmi.2025.3617289","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmi.2025.3617289","pdf_url":null,"source":{"id":"https://openalex.org/S58069681","display_name":"IEEE Transactions on Medical Imaging","issn_l":"0278-0062","issn":["0278-0062","1558-254X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Medical Imaging","raw_type":"journal-article"},{"id":"pmid:41052164","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41052164","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on medical imaging","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2114063639","https://openalex.org/W2604314403","https://openalex.org/W2616247523","https://openalex.org/W2896457183","https://openalex.org/W2901466771","https://openalex.org/W2962973065","https://openalex.org/W2963176022","https://openalex.org/W2963954913","https://openalex.org/W2979525559","https://openalex.org/W3011651912","https://openalex.org/W3164670515","https://openalex.org/W3165058054","https://openalex.org/W3175211395","https://openalex.org/W3184385036","https://openalex.org/W4221154868","https://openalex.org/W4281609631","https://openalex.org/W4285606599","https://openalex.org/W4293676823","https://openalex.org/W4312159230","https://openalex.org/W4312982094","https://openalex.org/W4361799513","https://openalex.org/W4379879090","https://openalex.org/W4383899673","https://openalex.org/W4386352883","https://openalex.org/W4386566421","https://openalex.org/W4386832321","https://openalex.org/W4387449190","https://openalex.org/W4387492750","https://openalex.org/W4388336515","https://openalex.org/W4388819879","https://openalex.org/W4391293808","https://openalex.org/W4391758541","https://openalex.org/W4399923931","https://openalex.org/W4402624074","https://openalex.org/W4402683917","https://openalex.org/W4402903980","https://openalex.org/W4403707714","https://openalex.org/W4406829669","https://openalex.org/W4407269601","https://openalex.org/W4407941451","https://openalex.org/W4408507944"],"related_works":[],"abstract_inverted_index":{"Medical":[0],"Visual":[1],"Question":[2],"Answering":[3],"(Med-VQA)":[4],"aims":[5],"to":[6,13,51,62,76,81,123,130,162],"analyze":[7],"medical":[8,28,60,127,220],"images":[9,29],"and":[10,21,24,47,84,102,146,157,178,197,227],"accurately":[11],"respond":[12],"natural":[14],"language":[15],"queries,":[16],"thereby":[17],"optimizing":[18,153],"clinical":[19],"workflows":[20],"improving":[22],"diagnostic":[23],"therapeutic":[25],"outcomes.":[26],"Although":[27],"contain":[30],"rich":[31],"visual":[32,145,221],"information,":[33],"the":[34,99,121,131,142,159,187,192,205,215,230],"corresponding":[35],"textual":[36],"queries":[37],"frequently":[38],"lack":[39],"sufficient":[40],"descriptive":[41],"content.":[42],"This":[43],"imbalance":[44],"of":[45,207,217,229],"information":[46,83],"modality":[48],"differences":[49],"leads":[50],"significant":[52],"semantic":[53,118,138],"bias.":[54],"Furthermore,":[55],"existing":[56],"approaches":[57],"integrate":[58],"external":[59],"knowledge":[61,71,108,128,154],"enhance":[63],"model":[64,190,231],"performance,":[65],"they":[66],"primarily":[67],"rely":[68],"on":[69,195],"static":[70],"that":[72,116,174,186],"lacks":[73],"dynamic":[74,113],"adaptation":[75],"specific":[77,129],"input":[78],"samples,":[79],"leading":[80],"redundant":[82],"noise":[85],"interference.":[86],"To":[87,106,135],"address":[88],"these":[89],"challenges,":[90],"we":[91,167],"propose":[92],"a":[93,112,169],"Contextual":[94],"Knowledge-Aware":[95],"Dynamic":[96],"Perception":[97],"for":[98,219],"Cross-Modal":[100],"Reasoning":[101],"Alignment":[103],"(CKRA)":[104],"Model.":[105],"mitigate":[107],"redundancy,":[109],"CKRA":[110,140,189,218],"employs":[111],"perception":[114],"mechanism":[115],"leverages":[117],"cues":[119],"from":[120],"query":[122],"selectively":[124],"filter":[125],"relevant":[126],"current":[132],"sample's":[133],"context.":[134],"alleviate":[136],"cross-modal":[137,176],"bias,":[139],"bridges":[141],"distance":[143],"between":[144],"linguistic":[147],"features":[148],"through":[149],"knowledge-image":[150],"contrastive":[151],"learning,":[152],"feature":[155],"representation":[156],"directing":[158],"model's":[160],"attention":[161,172],"key":[163],"image":[164],"regions.":[165],"Further,":[166],"design":[168],"dual-stream":[170],"guided":[171],"network":[173],"facilitates":[175],"interaction":[177],"alignment":[179],"across":[180],"multiple":[181],"dimensions.":[182],"Experimental":[183],"results":[184],"show":[185],"proposed":[188],"outperforms":[191],"state-of-the-art":[193],"method":[194],"SLAKE":[196],"VQA-RAD":[198],"datasets.":[199],"In":[200],"addition,":[201],"ablation":[202],"studies":[203],"validate":[204],"effectiveness":[206],"each":[208],"module,":[209],"while":[210],"Grad-CAM":[211],"maps":[212],"further":[213],"demonstrate":[214],"feasibility":[216],"questioning":[222],"tasks.":[223],"The":[224],"source":[225],"code":[226],"weights":[228],"are":[232],"available":[233],"at":[234],"https://github.com/cloneiq/CKRA-MedVQA.":[235]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
