{"id":"https://openalex.org/W7153334072","doi":"https://doi.org/10.48550/arxiv.2604.08322","title":"Fundus-R1: Training a Fundus-Reading MLLM with Knowledge-Aware Reasoning on Public Data","display_name":"Fundus-R1: Training a Fundus-Reading MLLM with Knowledge-Aware Reasoning on Public Data","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153334072","doi":"https://doi.org/10.48550/arxiv.2604.08322"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08322","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120419513","display_name":"Yuchuan Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yuchuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034103782","display_name":"Qijie Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Qijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Qian, Kaiheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Kaiheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133350996","display_name":"Jiazhen Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiazhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113398435","display_name":"Zijie Xin","orcid":"https://orcid.org/0000-0002-9220-8735"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin, Zijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128634717","display_name":"Bangxiang Lan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan, Bangxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133371066","display_name":"Jingyu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133341514","display_name":"Jianfeng Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Jianfeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060270456","display_name":"Xirong Li","orcid":"https://orcid.org/0000-0002-0220-8310"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xirong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11438","display_name":"Retinal Imaging and Analysis","score":0.6119999885559082,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11438","display_name":"Retinal Imaging and Analysis","score":0.6119999885559082,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09309999644756317,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.042100001126527786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.6715999841690063},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5825999975204468},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49459999799728394},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4936000108718872},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4108999967575073},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.39160001277923584}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7513999938964844},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.6715999841690063},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5825999975204468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.541100025177002},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49459999799728394},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4936000108718872},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45019999146461487},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4108999967575073},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.39160001277923584},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.3736000061035156},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.26989999413490295},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2614000141620636},{"id":"https://openalex.org/C2987933465","wikidata":"https://www.wikidata.org/wiki/Q141130","display_name":"Image manipulation","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6535564661026001,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fundus":[0,19],"imaging":[1],"such":[2],"as":[3],"CFP,":[4],"OCT":[5],"and":[6,17,195,209],"UWF":[7],"is":[8,39],"crucial":[9],"for":[10,141,224],"the":[11,37,97,122,159,179,216,222],"early":[12],"detection":[13],"of":[14,66,121,164,178],"retinal":[15],"anomalies":[16],"diseases.":[18],"image":[20,160],"understanding,":[21],"due":[22],"to":[23,35,40,92,104,158],"its":[24,205],"knowledge-intensive":[25],"nature,":[26],"poses":[27],"a":[28,42,63,101,106,138,155,172,210],"challenging":[29],"vision-language":[30],"task.":[31],"An":[32],"emerging":[33],"approach":[34],"addressing":[36],"task":[38],"post-train":[41],"generic":[43,156,206],"multimodal":[44],"large":[45],"language":[46],"model":[47],"(MLLM),":[48],"either":[49],"by":[50,55,154],"supervised":[51],"finetuning":[52],"(SFT)":[53],"or":[54],"reinforcement":[56],"learning":[57],"with":[58,70,126,171,229],"verifiable":[59],"rewards":[60],"(RLVR),":[61],"on":[62,188],"considerable":[64],"amount":[65],"in-house":[67],"samples":[68,77],"paired":[69],"high-quality":[71],"clinical":[72],"reports.":[73],"However,":[74],"these":[75],"valuable":[76],"are":[78,124,133],"not":[79,83],"publicly":[80,230],"accessible,":[81],"which":[82,110],"only":[84,127],"hinders":[85],"reproducibility":[86],"but":[87],"also":[88],"practically":[89],"limits":[90],"research":[91],"few":[93],"players.":[94],"To":[95],"overcome":[96],"barrier,":[98],"we":[99,111,136,168],"make":[100],"novel":[102],"attempt":[103],"train":[105],"reasoning-enhanced":[107],"fundus-reading":[108,190,227],"MLLM,":[109],"term":[112],"Fundus-R1,":[113],"using":[114,215],"exclusively":[115],"public":[116],"datasets,":[117],"wherein":[118],"over":[119],"94\\%":[120],"data":[123],"annotated":[125],"image-level":[128],"labels.":[129],"Our":[130],"technical":[131],"contributions":[132],"two-fold.":[134],"First,":[135],"propose":[137],"RAG-based":[139],"method":[140],"composing":[142],"image-specific,":[143],"knowledge-aware":[144],"reasoning":[145,181],"traces.":[146,218],"Such":[147],"auto-generated":[148],"traces":[149],"link":[150],"visual":[151],"findings":[152],"identified":[153],"MLLM":[157],"labels":[161],"in":[162,183],"terms":[163],"ophthalmic":[165],"knowledge.":[166],"Second,":[167],"enhance":[169],"RLVR":[170],"process":[173],"reward":[174],"that":[175,198],"encourages":[176],"self-consistency":[177],"generated":[180,217],"trace":[182],"each":[184],"rollout.":[185],"Extensive":[186],"experiments":[187],"three":[189],"benchmarks,":[191],"i.e.,":[192],"FunBench,":[193],"Omni-Fundus":[194],"GMAI-Fundus,":[196],"show":[197],"Fundus-R1":[199],"clearly":[200],"outperforms":[201],"multiple":[202],"baselines,":[203],"including":[204],"counterpart":[207],"(Qwen2.5-VL)":[208],"stronger":[211],"edition":[212],"post-trained":[213],"without":[214],"This":[219],"work":[220],"paves":[221],"way":[223],"training":[225],"powerful":[226],"MLLMs":[228],"available":[231],"data.":[232]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-11T00:00:00"}
