{"id":"https://openalex.org/W4300980522","doi":"https://doi.org/10.48550/arxiv.2209.15517","title":"Medical Image Understanding with Pretrained Vision Language Models: A Comprehensive Study","display_name":"Medical Image Understanding with Pretrained Vision Language Models: A Comprehensive Study","publication_year":2022,"publication_date":"2022-09-30","ids":{"openalex":"https://openalex.org/W4300980522","doi":"https://doi.org/10.48550/arxiv.2209.15517"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2209.15517","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.15517","pdf_url":"https://arxiv.org/pdf/2209.15517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.15517","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060788747","display_name":"Ziyuan Qin","orcid":"https://orcid.org/0000-0003-4882-4570"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qin, Ziyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060429045","display_name":"Huahui Yi","orcid":"https://orcid.org/0009-0007-9361-5491"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi, Huahui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013010213","display_name":"Qicheng Lao","orcid":"https://orcid.org/0000-0002-6032-8548"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lao, Qicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100456986","display_name":"Kang Li","orcid":"https://orcid.org/0000-0002-8136-9816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Kang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060788747"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9422000050544739,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8148188591003418},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6818649768829346},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6447082161903381},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5706577897071838},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5609667897224426},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5458276271820068},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5056576728820801},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.48564258217811584},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4640008807182312},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.44575241208076477},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41933420300483704},{"id":"https://openalex.org/keywords/transferability","display_name":"Transferability","score":0.41177666187286377}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8148188591003418},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6818649768829346},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6447082161903381},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5706577897071838},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5609667897224426},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5458276271820068},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5056576728820801},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.48564258217811584},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4640008807182312},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.44575241208076477},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41933420300483704},{"id":"https://openalex.org/C61272859","wikidata":"https://www.wikidata.org/wiki/Q7834031","display_name":"Transferability","level":3,"score":0.41177666187286377},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C140331021","wikidata":"https://www.wikidata.org/wiki/Q1868104","display_name":"Logit","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2209.15517","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.15517","pdf_url":"https://arxiv.org/pdf/2209.15517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2209.15517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2209.15517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.15517","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.15517","pdf_url":"https://arxiv.org/pdf/2209.15517","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2161221533","https://openalex.org/W4229699405","https://openalex.org/W1666484574","https://openalex.org/W2216382288","https://openalex.org/W2355491300","https://openalex.org/W4234629551","https://openalex.org/W2011110943","https://openalex.org/W2028856635","https://openalex.org/W2011433332","https://openalex.org/W2582594227"],"abstract_inverted_index":{"The":[0],"large-scale":[1],"pre-trained":[2,39,59],"vision":[3],"language":[4],"models":[5,163,167],"(VLM)":[6],"have":[7],"shown":[8],"remarkable":[9],"domain":[10],"transfer":[11],"capability":[12,22],"on":[13,137],"natural":[14],"images.":[15],"However,":[16],"it":[17],"remains":[18],"unknown":[19],"whether":[20],"this":[21],"can":[23,76,119],"also":[24],"apply":[25],"to":[26,41,55,90,101,156],"the":[27,35,42,53,74,78,103,128,152,157,165],"medical":[28,43,50,116,122,140],"image":[29,98],"domain.":[30],"This":[31,86],"paper":[32],"thoroughly":[33],"studies":[34],"knowledge":[36,57,79,123],"transferability":[37],"of":[38,115],"VLMs":[40,89],"domain,":[44],"where":[45],"we":[46,108],"show":[47],"that":[48,63,69,146],"well-designed":[49,148],"prompts":[51,129,149],"are":[52,70],"key":[54],"elicit":[56],"from":[58],"VLMs.":[60],"We":[61,133],"demonstrate":[62],"by":[64,168],"prompting":[65],"with":[66,94],"expressive":[67],"attributes":[68],"shared":[71],"between":[72],"domains,":[73],"VLM":[75],"carry":[77],"across":[80,142],"domains":[81],"and":[82,124,160],"improve":[83,151],"its":[84],"generalization.":[85],"mechanism":[87],"empowers":[88],"recognize":[91],"novel":[92],"objects":[93],"fewer":[95],"or":[96],"without":[97],"samples.":[99],"Furthermore,":[100],"avoid":[102],"laborious":[104],"manual":[105],"designing":[106],"process,":[107],"develop":[109],"three":[110],"approaches":[111],"for":[112,130],"automatic":[113],"generation":[114],"prompts,":[117,159],"which":[118],"inject":[120],"expert-level":[121],"image-specific":[125],"information":[126],"into":[127],"fine-grained":[131],"grounding.":[132],"conduct":[134],"extensive":[135],"experiments":[136],"thirteen":[138],"different":[139],"datasets":[141],"various":[143],"modalities,":[144],"showing":[145],"our":[147,161],"greatly":[150],"zero-shot":[153],"performance":[154],"compared":[155],"default":[158],"fine-tuned":[162],"surpass":[164],"supervised":[166],"a":[169],"significant":[170],"margin.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":5}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2022-10-04T00:00:00"}
