{"id":"https://openalex.org/W4416251438","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228227","title":"MicarVLMoE: A Modern Gated Cross-Aligned Vision-Language Mixture of Experts Model for Medical Image Captioning and Report Generation","display_name":"MicarVLMoE: A Modern Gated Cross-Aligned Vision-Language Mixture of Experts Model for Medical Image Captioning and Report Generation","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251438","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228227"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228227","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116043230","display_name":"Amaan Izhar","orcid":"https://orcid.org/0000-0001-6394-0794"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Amaan Izhar","raw_affiliation_strings":["Universiti Malaya,Faculty of Computer Science and Information Technology,Kuala Lumpur,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Malaya,Faculty of Computer Science and Information Technology,Kuala Lumpur,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068359573","display_name":"Nurul Japar","orcid":"https://orcid.org/0000-0002-3054-1874"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Nurul Japar","raw_affiliation_strings":["Universiti Malaya,Faculty of Computer Science and Information Technology,Kuala Lumpur,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Malaya,Faculty of Computer Science and Information Technology,Kuala Lumpur,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025895244","display_name":"Norisma Idris","orcid":"https://orcid.org/0000-0002-8006-7496"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Norisma Idris","raw_affiliation_strings":["Universiti Malaya,Faculty of Computer Science and Information Technology,Kuala Lumpur,Malaysia"],"affiliations":[{"raw_affiliation_string":"Universiti Malaya,Faculty of Computer Science and Information Technology,Kuala Lumpur,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071116593","display_name":"Ting Dang","orcid":"https://orcid.org/0000-0003-3806-1493"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ting Dang","raw_affiliation_strings":["The University of Melbourne,School of Computing and Information Systems,Melbourne,Australia"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne,School of Computing and Information Systems,Melbourne,Australia","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5116043230"],"corresponding_institution_ids":["https://openalex.org/I33849332"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37456917,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9101999998092651,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9101999998092651,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.022700000554323196,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.014600000344216824,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.6804999709129333},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4902999997138977},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4632999897003174},{"id":"https://openalex.org/keywords/medical-imaging","display_name":"Medical imaging","score":0.4551999866962433},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.3675000071525574},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33640000224113464},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.31630000472068787},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3077999949455261}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7712000012397766},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.6804999709129333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6467000246047974},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4902999997138977},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4632999897003174},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4602999985218048},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.4551999866962433},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39410001039505005},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3675000071525574},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30079999566078186},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.27649998664855957},{"id":"https://openalex.org/C166704113","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image registration","level":3,"score":0.27549999952316284},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2556999921798706},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228227","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W2101105183","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2565639579","https://openalex.org/W2575842049","https://openalex.org/W2770165365","https://openalex.org/W2897980926","https://openalex.org/W3095483091","https://openalex.org/W3104609094","https://openalex.org/W3151410070","https://openalex.org/W4285108627","https://openalex.org/W4293479414","https://openalex.org/W4293858478","https://openalex.org/W4363650407","https://openalex.org/W4366779622","https://openalex.org/W4385245566","https://openalex.org/W4386071687","https://openalex.org/W4386598301","https://openalex.org/W4390672988","https://openalex.org/W4391771706","https://openalex.org/W4395096791","https://openalex.org/W4402353729","https://openalex.org/W4403071606","https://openalex.org/W4403649782","https://openalex.org/W4404781870","https://openalex.org/W4404782825","https://openalex.org/W4406849112","https://openalex.org/W4406983803"],"related_works":[],"abstract_inverted_index":{"Medical":[0],"image":[1],"reporting":[2,111],"(MIR)":[3],"aims":[4],"to":[5,51,100],"generate":[6],"structured":[7],"clinical":[8,127],"descriptions":[9],"from":[10],"radiological":[11],"images.":[12],"Existing":[13],"methods":[14],"struggle":[15],"with":[16,46],"fine-grained":[17],"feature":[18],"extraction,":[19],"multimodal":[20],"alignment,":[21,130],"and":[22,33,86,107,118,123,131],"generalization":[23],"across":[24],"diverse":[25],"imaging":[26],"types,":[27],"often":[28],"relying":[29],"on":[30,36,114],"vanilla":[31],"transformers":[32],"focusing":[34],"primarily":[35],"chest":[37],"X-rays.":[38],"We":[39,97],"propose":[40],"MicarVLMoE,":[41],"a":[42,59,72,88],"vision-language":[43,80],"mixture-of-experts":[44,90],"model":[45,132],"gated":[47],"cross-aligned":[48],"fusion,":[49],"designed":[50],"address":[52],"these":[53],"limitations.":[54],"Our":[55],"architecture":[56],"includes:":[57],"(i)":[58],"multiscale":[60],"vision":[61],"encoder":[62],"(MSVE)":[63],"for":[64,79,93],"capturing":[65],"anatomical":[66],"details":[67],"at":[68,137],"varying":[69],"resolutions,":[70],"(ii)":[71],"multihead":[73],"dual-branch":[74],"latent":[75,83],"attention":[76],"(MDLA)":[77],"module":[78],"alignment":[81],"through":[82],"bottleneck":[84],"representations,":[85],"(iii)":[87],"modulated":[89],"(MoE)":[91],"decoder":[92],"adaptive":[94],"expert":[95],"specialization.":[96],"extend":[98],"MIR":[99],"CT":[101],"scans,":[102,106],"retinal":[103],"imaging,":[104],"MRI":[105],"gross":[108],"pathology":[109],"images,":[110],"state-of-the-art":[112],"results":[113],"COVCTR,":[115],"MMR,":[116],"PGROSS,":[117],"ROCO":[119],"datasets.":[120],"Extensive":[121],"experiments":[122],"ablations":[124],"confirm":[125],"improved":[126],"accuracy,":[128],"cross-modal":[129],"interpretability.":[133],"Code":[134],"is":[135],"available":[136],"https://github.com/AI-14/micar-vl-moe.":[138]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
