{"id":"https://openalex.org/W7141426505","doi":"https://doi.org/10.48550/arxiv.2603.24846","title":"NeuroVLM-Bench: Evaluation of Vision-Enabled Large Language Models for Clinical Reasoning in Neurological Disorders","display_name":"NeuroVLM-Bench: Evaluation of Vision-Enabled Large Language Models for Clinical Reasoning in Neurological Disorders","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7141426505","doi":"https://doi.org/10.48550/arxiv.2603.24846"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.24846","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24846","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.24846","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021538397","display_name":"Katarina Trojachanec Dineva","orcid":"https://orcid.org/0000-0002-7050-4947"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dineva, Katarina Trojachanec","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048093534","display_name":"Stefan Andonov","orcid":"https://orcid.org/0000-0002-2025-9314"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andonov, Stefan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074490095","display_name":"Ilinka Ivanoska","orcid":"https://orcid.org/0000-0002-9363-1527"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ivanoska, Ilinka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072517317","display_name":"Ivan Kitanovski","orcid":"https://orcid.org/0000-0002-0014-4229"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kitanovski, Ivan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091165556","display_name":"Sasho Gramatikov","orcid":"https://orcid.org/0000-0001-6490-5093"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gramatikov, Sasho","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130770968","display_name":"Tamara Kostova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kostova, Tamara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129389170","display_name":"Monika Simjanoska Misheva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Misheva, Monika Simjanoska","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084784960","display_name":"Kostadin Mishev","orcid":"https://orcid.org/0000-0003-3982-3330"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishev, Kostadin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.14820000529289246,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.14820000529289246,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.13740000128746033,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11599999666213989,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/neuroimaging","display_name":"Neuroimaging","score":0.8044000267982483},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5223000049591064},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5156999826431274},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4952000081539154},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.44920000433921814},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.39239999651908875},{"id":"https://openalex.org/keywords/computational-model","display_name":"Computational model","score":0.3671000003814697},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.34869998693466187}],"concepts":[{"id":"https://openalex.org/C58693492","wikidata":"https://www.wikidata.org/wiki/Q551875","display_name":"Neuroimaging","level":2,"score":0.8044000267982483},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5764999985694885},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5223000049591064},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.51910001039505},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5156999826431274},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4952000081539154},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4657000005245209},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42149999737739563},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.39239999651908875},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.3671000003814697},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.349700003862381},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.34869998693466187},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C2779422653","wikidata":"https://www.wikidata.org/wiki/Q2836","display_name":"Aphasia","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2888999879360199},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C2780640218","wikidata":"https://www.wikidata.org/wiki/Q8277","display_name":"Multiple sclerosis","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.24846","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24846","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.24846","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24846","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.6198160648345947}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,20,224],"multimodal":[3,105,222],"large":[4,33],"language":[5,34],"models":[6,35,156],"enable":[7],"new":[8],"possibilities":[9],"for":[10,36,99,154],"image-based":[11],"decision":[12],"support.":[13],"However,":[14],"their":[15],"reliability":[16],"and":[17,42,53,72,88,117,145,162,165,215],"operational":[18],"trade-offs":[19],"neuroimaging":[21,38],"remain":[22,148],"insufficiently":[23],"understood.":[24],"We":[25],"present":[26],"a":[27],"comprehensive":[28],"benchmarking":[29],"study":[30],"of":[31,198,221],"vision-enabled":[32],"2D":[37],"using":[39],"curated":[40],"MRI":[41],"CT":[43],"datasets":[44],"covering":[45],"multiple":[46,61,143],"sclerosis,":[47],"stroke,":[48],"brain":[49],"tumors,":[50],"other":[51],"abnormalities,":[52],"normal":[54],"controls.":[55],"Models":[56],"are":[57,119],"required":[58],"to":[59],"generate":[60],"outputs":[62],"simultaneously,":[63],"including":[64],"diagnosis,":[65],"diagnosis":[66],"subtype,":[67],"imaging":[68,112],"modality,":[69],"specialized":[70],"sequence,":[71],"anatomical":[73],"plane.":[74],"Performance":[75],"is":[76,139],"evaluated":[77],"across":[78],"four":[79],"directions:":[80],"discriminative":[81],"classification":[82,131],"with":[83],"abstention,":[84],"calibration,":[85],"structured-output":[86],"validity,":[87],"computational":[89],"efficiency.":[90],"A":[91],"multi-phase":[92],"framework":[93],"ensures":[94],"fair":[95],"comparison":[96],"while":[97,142,173,202],"controlling":[98],"selection":[100],"bias.":[101],"Across":[102],"twenty":[103],"frontier":[104],"models,":[106,201],"the":[107,134,168,176,185,195],"results":[108],"show":[109],"that":[110],"technical":[111],"attributes":[113],"such":[114],"as":[115,133,189],"modality":[116],"plane":[118],"nearly":[120],"solved,":[121],"whereas":[122],"diagnostic":[123,171],"reasoning,":[124],"especially":[125],"subtype":[126],"prediction,":[127],"remains":[128],"challenging.":[129],"Tumor":[130],"emerges":[132],"most":[135,186],"reliable":[136],"task,":[137],"stroke":[138],"moderately":[140],"solvable,":[141],"sclerosis":[144],"rare":[146],"abnormalities":[147],"difficult.":[149],"Few-shot":[150],"prompting":[151],"improves":[152],"performance":[153,197],"several":[155,199],"but":[157],"increases":[158],"token":[159],"usage,":[160],"latency,":[161],"cost.":[163],"Gemini-2.5-Pro":[164],"GPT-5-Chat":[166],"achieve":[167],"strongest":[169],"overall":[170],"performance,":[172,213],"Gemini-2.5-Flash":[174],"offers":[175],"best":[177],"efficiency-performance":[178],"trade-off.":[179],"Among":[180],"open-weight":[181],"architectures,":[182],"MedGemma-1.5-4B":[183],"demonstrates":[184],"promising":[187],"results,":[188],"under":[190],"few-shot":[191],"prompting,":[192],"it":[193],"approaches":[194],"zero-shot":[196],"proprietary":[200],"maintaining":[203],"perfect":[204],"structured":[205],"output.":[206],"These":[207],"findings":[208],"provide":[209],"practical":[210],"insights":[211],"into":[212],"reliability,":[214],"efficiency":[216],"trade-offs,":[217],"supporting":[218],"standardized":[219],"evaluation":[220],"LLMs":[223],"neuroimaging.":[225]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-28T00:00:00"}
