{"id":"https://openalex.org/W7134830214","doi":"https://doi.org/10.48550/arxiv.2603.07131","title":"Deep Expert Injection for Anchoring Retinal VLMs with Domain-Specific Knowledge","display_name":"Deep Expert Injection for Anchoring Retinal VLMs with Domain-Specific Knowledge","publication_year":2026,"publication_date":"2026-03-07","ids":{"openalex":"https://openalex.org/W7134830214","doi":"https://doi.org/10.48550/arxiv.2603.07131"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.07131","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.07131","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128682346","display_name":"Shuai Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lu, Shuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128663897","display_name":"Meng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Meng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128664187","display_name":"Jia Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Jia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128647780","display_name":"Jiawei Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Jiawei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128678568","display_name":"Bo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111752330","display_name":"Shengzhu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Shengzhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128643871","display_name":"Weihang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weihang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128633093","display_name":"Huazhu Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Huazhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128644992","display_name":"Huiqi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Huiqi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5128682346"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8927000164985657,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8927000164985657,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11438","display_name":"Retinal Imaging and Analysis","score":0.053300000727176666,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.007499999832361937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4569000005722046},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.42170000076293945},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.421099990606308},{"id":"https://openalex.org/keywords/sensory-cue","display_name":"Sensory cue","score":0.3111000061035156},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.30390000343322754},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.299699991941452}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7559999823570251},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6072999835014343},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4569000005722046},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.42170000076293945},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.421099990606308},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3587999939918518},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.29280000925064087},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26899999380111694}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.07131","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.07131","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Vision":[1],"Language":[2],"Models":[3],"(LVLMs)":[4],"show":[5],"immense":[6],"potential":[7],"for":[8,115,123],"automated":[9],"ophthalmic":[10,216,225],"diagnosis.":[11],"However,":[12],"their":[13],"clinical":[14],"deployment":[15],"is":[16,60],"severely":[17],"hindered":[18],"by":[19,63,160],"lacking":[20],"domain-specific":[21,208],"knowledge.":[22],"In":[23],"this":[24],"work,":[25],"we":[26,79,130,149],"identify":[27],"two":[28],"structural":[29],"deficiencies":[30],"hindering":[31],"reliable":[32],"medical":[33],"reasoning:":[34],"1)":[35],"the":[36,53,181,221],"Perception":[37],"Gap,":[38,55],"where":[39,56],"general-purpose":[40],"visual":[41,58,109,163,177,189,217],"encoders":[42],"fail":[43],"to":[44,72,86,155,184],"resolve":[45],"fine-grained":[46],"pathological":[47,124],"cues":[48],"(e.g.,":[49],"microaneurysms);":[50],"and":[51,118,211],"2)":[52],"Reasoning":[54],"sparse":[57],"evidence":[59],"progressively":[61],"overridden":[62],"massive":[64,202],"language":[65],"priors":[66],"in":[67,188,215],"deeper":[68],"transformer":[69],"layers,":[70],"leading":[71],"ungrounded":[73],"hallucinations.":[74],"To":[75,126],"bridge":[76],"these":[77],"gaps,":[78],"propose":[80],"EyExIn,":[81],"a":[82,94,112,119,132,176],"data-efficient":[83],"framework":[84],"designed":[85],"anchor":[87],"retinal":[88],"VLMs":[89],"with":[90],"expert":[91,121],"knowledge":[92,209],"via":[93],"Deep":[95,152],"Expert":[96,153],"Injection":[97,154],"mechanism.":[98],"Our":[99],"architecture":[100],"employs":[101],"an":[102],"Expert-Aware":[103],"Dual-Stream":[104],"encoding":[105],"strategy":[106],"that":[107,179,197],"decouples":[108],"representation":[110],"into":[111,169],"general":[113],"stream":[114,122],"anatomical":[116],"context":[117],"specialized":[120],"semantics.":[125],"ensure":[127],"high-fidelity":[128],"integration,":[129],"design":[131],"Semantic-Adaptive":[133],"Gated":[134],"Fusion":[135],"module,":[136],"which":[137],"dynamically":[138],"amplifies":[139],"subtle":[140],"lesion":[141],"signals":[142],"while":[143],"filtering":[144],"irrelevant":[145],"background":[146],"noise.":[147],"Furthermore,":[148],"introduce":[150],"Adaptive":[151],"embed":[156],"persistent":[157],"\"Vision":[158],"Anchors\"":[159],"integrating":[161],"fused":[162],"features":[164],"as":[165],"residual":[166],"biases":[167],"directly":[168],"intermediate":[170],"LLM":[171],"layers.":[172],"This":[173],"mechanism":[174],"creates":[175],"shortcut":[178],"forces":[180],"reasoning":[182],"stack":[183],"remain":[185],"strictly":[186],"grounded":[187],"evidence.":[190],"Extensive":[191],"experiments":[192],"across":[193],"four":[194],"benchmarks":[195],"demonstrate":[196],"our":[198],"model":[199],"consistently":[200],"outperforms":[201],"proprietary":[203],"systems.":[204],"EyExIn":[205],"significantly":[206],"enhances":[207],"embedding":[210],"achieves":[212],"state-of-the-art":[213],"precision":[214],"question":[218],"answering,":[219],"advancing":[220],"development":[222],"of":[223],"trustworthy":[224],"AI.":[226]},"counts_by_year":[],"updated_date":"2026-03-11T06:17:14.884878","created_date":"2026-03-11T00:00:00"}
