{"id":"https://openalex.org/W7126101746","doi":"https://doi.org/10.1109/bibm66473.2025.11357220","title":"Bridging the Perception-Cognition Gap:Re-Engineering SAM2 with Hilbert-Mamba for Robust VLM-Based Medical Diagnosis","display_name":"Bridging the Perception-Cognition Gap:Re-Engineering SAM2 with Hilbert-Mamba for Robust VLM-Based Medical Diagnosis","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W7126101746","doi":"https://doi.org/10.1109/bibm66473.2025.11357220"},"language":null,"primary_location":{"id":"doi:10.1109/bibm66473.2025.11357220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm66473.2025.11357220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100722395","display_name":"Hao Wu","orcid":"https://orcid.org/0000-0003-1688-0404"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Wu","raw_affiliation_strings":["Southern University of Science and Technology,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Shenzhen,China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065859286","display_name":"Hui Li","orcid":"https://orcid.org/0000-0001-9198-3951"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Li","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124196105","display_name":"Yiyun Su","orcid":null},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiyun Su","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100722395"],"corresponding_institution_ids":["https://openalex.org/I3045169105"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.73578244,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4275","last_page":"4278"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8223000168800354,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8223000168800354,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0284000001847744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.02800000086426735,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6381000280380249},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.6086999773979187},{"id":"https://openalex.org/keywords/medical-diagnosis","display_name":"Medical diagnosis","score":0.53329998254776},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.525600016117096},{"id":"https://openalex.org/keywords/medical-imaging","display_name":"Medical imaging","score":0.49869999289512634},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.39660000801086426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7174999713897705},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6381000280380249},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.6086999773979187},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.586899995803833},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.53329998254776},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.525600016117096},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.49869999289512634},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3993000090122223},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.39660000801086426},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3727000057697296},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3635999858379364},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.361299991607666},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.34540000557899475},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2597000002861023},{"id":"https://openalex.org/C22029948","wikidata":"https://www.wikidata.org/wiki/Q45089","display_name":"Dice","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm66473.2025.11357220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm66473.2025.11357220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5965959429740906,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2889615630","https://openalex.org/W2922989930","https://openalex.org/W2963012093","https://openalex.org/W3042445013","https://openalex.org/W4391109864","https://openalex.org/W4392203599"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"studies":[1],"suggest":[2],"that":[3,210],"Visual":[4],"Language":[5],"Models":[6],"(VLMs)":[7],"hold":[8],"great":[9],"potential":[10,216],"for":[11,64,127],"tasks":[12],"such":[13],"as":[14],"automated":[15],"medical":[16,24,128,224],"diagnosis.":[17],"However,":[18],"processing":[19],"complex":[20],"three-dimensional":[21],"(3D)":[22],"multimodal":[23,71],"images":[25],"poses":[26],"significant":[27],"challenges-specifically,":[28],"the":[29,36,61,69,76,87,91,104,108,116,149,175,178,182,211,219],"effective":[30],"integration":[31],"of":[32,39,90,107,118,177,191,199,223],"complementary":[33],"information":[34],"and":[35,140,156,221],"occasional":[37],"oversight":[38],"subtle":[40],"yet":[41],"critical":[42,126],"pathological":[43],"features.":[44],"To":[45],"address":[46],"these":[47],"issues,":[48],"we":[49,98],"present":[50],"a":[51,124,134,141,188,194],"novel":[52,135],"two-stage":[53],"fusion":[54],"framework":[55,59],"termed":[56],"Hilbert-VLM.":[57],"This":[58],"leverages":[60],"HilbertMed-SAM":[62],"module":[63,152],"precise":[65],"lesion":[66],"segmentation,":[67],"with":[68,193],"generated":[70],"enhanced":[72],"prompts":[73],"then":[74],"guiding":[75],"VLM":[77,167],"toward":[78],"accurate":[79],"disease":[80],"classification.":[81],"Our":[82],"key":[83],"innovation":[84],"lies":[85],"in":[86,121],"systematic":[88],"redesign":[89],"Segment":[92],"Anything":[93],"Model":[94,112],"2":[95],"(SAM2)":[96],"architecture:":[97],"incorporate":[99],"Hilbert":[100],"space-filling":[101],"curves":[102],"into":[103,161],"scanning":[105],"mechanism":[106,139],"Mamba":[109],"State":[110],"Space":[111],"(SSM)":[113],"to":[114,144,165,173,217],"maximize":[115],"preservation":[117],"spatial":[119],"locality":[120],"3D":[122],"data,":[123],"property":[125],"image":[129],"analysis.":[130,226],"We":[131],"also":[132],"introduce":[133],"Hilbert-Mamba":[136],"Cross-Attention":[137],"(HMCA)":[138],"scale-aware":[142],"decoder":[143],"capture":[145],"fine-grained":[146],"details.":[147],"Meanwhile,":[148],"prompt":[150,164],"enhancement":[151],"unifies":[153],"segmentation":[154,184],"masks":[155],"their":[157],"corresponding":[158],"textual":[159],"attributes":[160],"an":[162],"information-dense":[163],"support":[166],"inference.":[168],"Extensive":[169],"experiments":[170],"were":[171],"conducted":[172],"validate":[174],"effectiveness":[176],"Hilbert-VLM":[179],"model.":[180],"On":[181],"BraTS2021":[183],"benchmark,":[185],"it":[186],"achieves":[187],"Dice":[189],"score":[190],"82.35%,":[192],"diagnostic":[195],"classification":[196],"accuracy":[197,220],"(ACC)":[198],"<tex":[200],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[201],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{7":[202],"8.":[203],"8":[204],"5":[205],"\\%}$</tex>.":[206],"These":[207],"results":[208],"demonstrate":[209],"proposed":[212],"model":[213],"offers":[214],"substantial":[215],"improve":[218],"reliability":[222],"VLMbased":[225]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-30T00:00:00"}
