{"id":"https://openalex.org/W4415328512","doi":"https://doi.org/10.1109/iccv51701.2025.02266","title":"Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding","display_name":"Seeing the Trees for the Forest: Rethinking Weakly-Supervised Medical Visual Grounding","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4415328512","doi":"https://doi.org/10.1109/iccv51701.2025.02266"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02266","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02266","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.15123","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115757335","display_name":"Ta Duc Huy","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Ta Duc Huy","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040911950","display_name":"Phuong Duy Huynh","orcid":"https://orcid.org/0000-0003-3854-3820"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Duy Anh Huynh","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011835422","display_name":"Yutong Xie","orcid":"https://orcid.org/0000-0002-6644-1250"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Yutong Xie","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070842891","display_name":"Yuankai Qi","orcid":"https://orcid.org/0000-0003-4312-5682"},"institutions":[{"id":"https://openalex.org/I99043593","display_name":"Macquarie University","ror":"https://ror.org/01sf06y89","country_code":"AU","type":"education","lineage":["https://openalex.org/I99043593"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yuankai Qi","raw_affiliation_strings":["Macquarie University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Macquarie University","institution_ids":["https://openalex.org/I99043593"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340143","display_name":"Qi Chen","orcid":"https://orcid.org/0000-0001-9367-4757"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qi Chen","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058485829","display_name":"Phi Le Nguyen","orcid":"https://orcid.org/0000-0001-6547-7641"},"institutions":[{"id":"https://openalex.org/I94518387","display_name":"Hanoi University of Science and Technology","ror":"https://ror.org/04nyv3z04","country_code":"VN","type":"education","lineage":["https://openalex.org/I94518387"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Phi Le Nguyen","raw_affiliation_strings":["Hanoi University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hanoi University of Science and Technology","institution_ids":["https://openalex.org/I94518387"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sen Kim Tran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sen Kim Tran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050488362","display_name":"Son Lam Phung","orcid":"https://orcid.org/0000-0002-3076-0540"},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Son Lam Phung","raw_affiliation_strings":["University of Wollongong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wollongong","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028024287","display_name":"Anton van den Hengel","orcid":"https://orcid.org/0000-0003-3027-8364"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Anton van den Hengel","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103020604","display_name":"Zhibin Liao","orcid":"https://orcid.org/0000-0001-9965-4511"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhibin Liao","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085105499","display_name":"Minh\u2010Son To","orcid":"https://orcid.org/0000-0002-8060-6218"},"institutions":[{"id":"https://openalex.org/I169541294","display_name":"Flinders University","ror":"https://ror.org/01kpzv902","country_code":"AU","type":"education","lineage":["https://openalex.org/I169541294"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Minh-Son To","raw_affiliation_strings":["Flinders University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Flinders University","institution_ids":["https://openalex.org/I169541294"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035480544","display_name":"Johan Verjans","orcid":"https://orcid.org/0000-0002-8336-6774"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Johan W. Verjans","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086523266","display_name":"Vu Minh Hieu Phan","orcid":"https://orcid.org/0000-0003-3861-0296"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Vu Minh Hieu Phan","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5115757335"],"corresponding_institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37554111,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"24445","last_page":"24455"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10912","display_name":"Telemedicine and Telehealth Implementation","score":0.7944999933242798,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10912","display_name":"Telemedicine and Telehealth Implementation","score":0.7944999933242798,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11999","display_name":"Empathy and Medical Education","score":0.718999981880188,"subfield":{"id":"https://openalex.org/subfields/2738","display_name":"Psychiatry and Mental health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.7038000226020813,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.9045000076293945},{"id":"https://openalex.org/keywords/transparency","display_name":"Transparency (behavior)","score":0.6424000263214111},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.47940000891685486},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.45739999413490295},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.40610000491142273},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3774000108242035}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.9045000076293945},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.6424000263214111},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5972999930381775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5436000227928162},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.47940000891685486},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.45739999413490295},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.35580000281333923},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32829999923706055},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.32019999623298645},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.31439998745918274},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.29670000076293945},{"id":"https://openalex.org/C2779134260","wikidata":"https://www.wikidata.org/wiki/Q12136","display_name":"Disease","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.28200000524520874},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.25130000710487366},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.25110000371932983}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02266","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02266","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2505.15123","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.15123","pdf_url":"https://arxiv.org/pdf/2505.15123","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.15123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.15123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.15123","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.15123","pdf_url":"https://arxiv.org/pdf/2505.15123","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415328512.pdf","grobid_xml":"https://content.openalex.org/works/W4415328512.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"grounding":[1,167],"(VG)":[2],"is":[3],"the":[4,8,91,99,118,137,145],"capability":[5],"to":[6,32,53,61,87,143,172],"identify":[7,144],"specific":[9],"regions":[10,59,95,154],"in":[11,47],"an":[12],"image":[13,147],"associated":[14],"with":[15,57],"a":[16,66,141],"particular":[17],"text":[18,119],"description.":[19],"In":[20,72],"medical":[21],"imaging,":[22],"VG":[23],"enhances":[24],"interpretability":[25],"by":[26,169],"highlighting":[27],"relevant":[28],"pathological":[29],"features":[30],"corresponding":[31],"textual":[33,55],"descriptions,":[34],"improving":[35],"model":[36],"transparency":[37],"and":[38,65,120],"trustworthiness":[39],"for":[40,103],"wider":[41],"adoption":[42],"of":[43,68,96,109,140],"deep":[44],"learning":[45,105],"models":[46,51],"clinical":[48],"practice.":[49],"Current":[50],"struggle":[52],"associate":[54],"descriptions":[56],"disease":[58,111,121],"due":[60],"inefficient":[62],"attention":[63,93],"mechanisms":[64],"lack":[67],"fine-grained":[69],"token":[70],"representations.":[71],"this":[73],"paper,":[74],"we":[75,126],"empirically":[76],"demonstrate":[77],"two":[78],"key":[79],"observations.":[80],"First,":[81],"current":[82],"VLMs":[83],"assign":[84],"high":[85],"norms":[86],"background":[88,157],"tokens,":[89],"diverting":[90],"model's":[92],"from":[94],"disease.":[97],"Second,":[98],"global":[100],"tokens":[101],"used":[102],"cross-modal":[104],"are":[106],"not":[107],"representative":[108],"local":[110],"tokens.":[112,122],"This":[113,149],"hampers":[114],"identifying":[115],"correlations":[116],"between":[117],"To":[123],"address":[124],"this,":[125],"introduce":[127],"simple,":[128],"yet":[129],"effective":[130],"Disease-Aware":[131],"Prompting":[132],"(DAP)":[133],"process,":[134],"which":[135],"uses":[136],"explainability":[138],"map":[139],"VLM":[142],"appropriate":[146],"features.":[148],"simple":[150],"strategy":[151],"amplifies":[152],"disease-relevant":[153],"while":[155],"suppressing":[156],"interference.":[158],"Without":[159],"any":[160],"additional":[161],"pixel-level":[162],"annotations,":[163],"DAP":[164],"improves":[165],"visual":[166],"accuracy":[168],"20.74%":[170],"compared":[171],"state-of-the-art":[173],"methods":[174],"across":[175],"three":[176],"major":[177],"chest":[178],"X-ray":[179],"datasets.":[180]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-19T00:00:00"}
