{"id":"https://openalex.org/W4404648570","doi":"https://doi.org/10.48550/arxiv.2411.12915","title":"VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge","display_name":"VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge","publication_year":2024,"publication_date":"2024-11-19","ids":{"openalex":"https://openalex.org/W4404648570","doi":"https://doi.org/10.48550/arxiv.2411.12915"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.12915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12915","pdf_url":"https://arxiv.org/pdf/2411.12915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.12915","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042877366","display_name":"Vishwesh Nath","orcid":"https://orcid.org/0000-0002-6840-6205"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nath, Vishwesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100461989","display_name":"Wenqi Li","orcid":"https://orcid.org/0000-0003-1081-2830"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031003824","display_name":"Dong Yang","orcid":"https://orcid.org/0000-0002-5031-4337"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Dong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002997328","display_name":"Andriy Myronenko","orcid":"https://orcid.org/0000-0001-8713-7031"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Myronenko, Andriy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100536603","display_name":"Mingxin Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Mingxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100594691","display_name":"Yao Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065136413","display_name":"Zhijian Liu","orcid":"https://orcid.org/0009-0007-3905-9893"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zhijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108868459","display_name":"Hongxu Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Hongxu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109672976","display_name":"Y.M. Law","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yucheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037478865","display_name":"Yucheng Tang","orcid":"https://orcid.org/0000-0002-6008-9700"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Pengfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051357970","display_name":"Pengfei Guo","orcid":"https://orcid.org/0009-0007-2561-4091"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Can","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070998415","display_name":"Can Zhao","orcid":"https://orcid.org/0000-0001-7286-3452"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Ziyue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039608149","display_name":"Ziyue Xu","orcid":"https://orcid.org/0000-0002-5728-6869"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yufan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102852563","display_name":"Yufan He","orcid":"https://orcid.org/0000-0003-4095-9104"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heinrich, Greg","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030133314","display_name":"Greg Heinrich","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Law, Yee Man","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001892061","display_name":"Stephen Aylward","orcid":"https://orcid.org/0000-0002-7862-8856"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Simon, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066969226","display_name":"Marc Edgar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harmon, Stephanie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063626402","display_name":"Michael Zephyr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aylward, Stephen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066945976","display_name":"Pavlo Molchanov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Edgar, Marc","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022455023","display_name":"Bar\u0131\u015f T\u00fcrkbey","orcid":"https://orcid.org/0000-0003-0853-6494"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zephyr, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043710204","display_name":"Holger R. Roth","orcid":"https://orcid.org/0000-0002-3662-8743"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Song","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052339079","display_name":"Daguang Xu","orcid":"https://orcid.org/0000-0002-4621-881X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Molchanov, Pavlo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Turkbey, Baris","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Turkbey, Baris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Roth, Holger","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roth, Holger","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Xu, Daguang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Daguang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":25,"corresponding_author_ids":["https://openalex.org/A5042877366"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12576","display_name":"vaccines and immunoinformatics approaches","score":0.9478999972343445,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12576","display_name":"vaccines and immunoinformatics approaches","score":0.9478999972343445,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9460999965667725,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5028488039970398},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42410218715667725},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3527444303035736},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.32750070095062256}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5028488039970398},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42410218715667725},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3527444303035736},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.32750070095062256}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.12915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12915","pdf_url":"https://arxiv.org/pdf/2411.12915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.12915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.12915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.12915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12915","pdf_url":"https://arxiv.org/pdf/2411.12915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404648570.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Generalist":[0],"vision":[1,29,79],"language":[2],"models":[3,46,130,220],"(VLMs)":[4],"have":[5],"made":[6],"significant":[7],"strides":[8],"in":[9,16,39,70,76,176,234],"computer":[10,28],"vision,":[11],"but":[12,38],"they":[13,138],"fall":[14],"short":[15],"specialized":[17,112],"fields":[18],"like":[19,47],"healthcare,":[20,40],"where":[21],"expert":[22,126,129,193],"knowledge":[23,63,191],"is":[24,42,114],"essential.":[25],"In":[26,100],"traditional":[27],"tasks,":[30,145],"creative":[31],"or":[32],"approximate":[33],"answers":[34],"may":[35],"be":[36],"acceptable,":[37],"precision":[41],"paramount.Current":[43],"large":[44],"multimodal":[45],"Gemini":[48],"and":[49,83,97,121,150,155,217],"GPT-4o":[50],"are":[51,73,135,139,165],"insufficient":[52],"for":[53,105,132,142,169,185,239],"medical":[54,106,119,133,162,186,240],"tasks":[55],"due":[56],"to":[57,147,172],"their":[58],"reliance":[59],"on":[60,118,222],"memorized":[61],"internet":[62],"rather":[64],"than":[65],"the":[66,212,223,229],"nuanced":[67],"expertise":[68,233],"required":[69],"healthcare.":[71],"VLMs":[72,187,238],"usually":[74],"trained":[75,141,221],"three":[77],"stages:":[78],"pre-training,":[80,82],"vision-language":[81],"instruction":[84],"fine-tuning":[85],"(IFT).":[86],"IFT":[87,113],"has":[88],"been":[89],"typically":[90],"applied":[91],"using":[92],"a":[93,108,170,181],"mixture":[94],"of":[95,111,161,209,231],"generic":[96],"healthcare":[98],"data.":[99],"contrast,":[101],"we":[102,198],"propose":[103],"that":[104,164,188],"VLMs,":[107],"fourth":[109],"stage":[110],"necessary,":[115],"which":[116,157],"focuses":[117],"data":[120],"includes":[122],"information":[123],"from":[124],"domain":[125,190,232],"models.":[127,194],"Domain":[128],"developed":[131],"use":[134],"crucial":[136],"because":[137],"specifically":[140],"certain":[143],"clinical":[144],"e.g.":[146],"detect":[148],"tumors":[149],"classify":[151],"abnormalities":[152],"through":[153],"segmentation":[154],"classification,":[156],"learn":[158],"fine-grained":[159],"features":[160],"data$-$features":[163],"often":[166],"too":[167],"intricate":[168],"VLM":[171],"capture":[173],"effectively":[174],"especially":[175],"radiology.":[177],"This":[178],"paper":[179],"introduces":[180],"new":[182],"framework,":[183],"VILA-M3,":[184],"utilizes":[189],"via":[192],"Through":[195],"our":[196],"experiments,":[197],"show":[199],"an":[200,206],"improved":[201],"state-of-the-art":[202],"(SOTA)":[203],"performance":[204],"with":[205],"average":[207],"improvement":[208],"~9%":[210],"over":[211,219],"prior":[213],"SOTA":[214],"model":[215],"Med-Gemini":[216],"~6%":[218],"specific":[224],"tasks.":[225],"Our":[226],"approach":[227],"emphasizes":[228],"importance":[230],"creating":[235],"precise,":[236],"reliable":[237],"applications.":[241]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
