{"id":"https://openalex.org/W7137895137","doi":"https://doi.org/10.1609/aaai.v40i14.38141","title":"MedReasoner: Reinforcement Learning Drives Reasoning Grounding from Clinical Thought to Pixel-Level Precision","display_name":"MedReasoner: Reinforcement Learning Drives Reasoning Grounding from Clinical Thought to Pixel-Level Precision","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137895137","doi":"https://doi.org/10.1609/aaai.v40i14.38141"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i14.38141","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38141","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i14.38141","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhonghao Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhonghao Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Muxi Diao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muxi Diao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yuxuan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuxuan Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ruoyan Jing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruoyan Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jiayuan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiayuan Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kaizhou Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaizhou Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lele Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lele Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yanxi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanxi Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kongming Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kongming Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Zhanyu Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhanyu Ma","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14321399,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"14","first_page":"11577","last_page":"11585"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.567799985408783,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.567799985408783,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.16130000352859497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.05959999933838844,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5746999979019165},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5740000009536743},{"id":"https://openalex.org/keywords/semantic-reasoner","display_name":"Semantic reasoner","score":0.5600000023841858},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5314000248908997},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.48969998955726624},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.4569999873638153},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4368000030517578},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.41110000014305115},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.3774000108242035}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6531000137329102},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.649399995803833},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5746999979019165},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5740000009536743},{"id":"https://openalex.org/C9616225","wikidata":"https://www.wikidata.org/wiki/Q3929429","display_name":"Semantic reasoner","level":2,"score":0.5600000023841858},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5314000248908997},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.4569999873638153},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4368000030517578},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.41110000014305115},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3944999873638153},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35670000314712524},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3434999883174896},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.3021000027656555},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.30169999599456787},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.288100004196167},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i14.38141","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38141","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i14.38141","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38141","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.43227288126945496,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Accurately":[0],"grounding":[1],"regions":[2],"of":[3,83,162],"interest":[4],"(ROIs)":[5],"is":[6,121],"critical":[7],"for":[8,165],"diagnosis":[9],"and":[10,74,93,101,141,150],"treatment":[11],"planning":[12],"in":[13,49],"medical":[14,167],"imaging.":[15],"While":[16],"multimodal":[17],"large":[18],"language":[19],"models":[20],"(MLLMs)":[21],"combine":[22],"visual":[23],"perception":[24],"with":[25,36,123,136],"natural":[26],"language,":[27],"current":[28],"medical-grounding":[29],"pipelines":[30],"still":[31],"rely":[32],"on":[33,148],"supervised":[34],"fine-tuning":[35],"explicit":[37],"spatial":[38,132],"hints,":[39],"making":[40],"them":[41],"ill-equipped":[42],"to":[43,154],"handle":[44],"the":[45,159],"implicit":[46,90],"queries":[47,92],"common":[48],"clinical":[50,72,91,156],"practice.":[51],"This":[52],"work":[53],"makes":[54],"three":[55],"core":[56],"contributions.":[57],"We":[58],"first":[59],"define":[60],"Unified":[61],"Medical":[62],"Reasoning":[63],"Grounding":[64],"(UMRG),":[65],"a":[66,81,109,127],"novel":[67],"vision\u2013language":[68],"task":[69],"that":[70,112],"demands":[71],"reasoning":[73,94,115],"pixel-level":[75,87],"grounding.":[76,168],"Second,":[77],"we":[78,106],"release":[79],"U-MRG-14K,":[80],"dataset":[82],"14K":[84],"samples":[85],"featuring":[86],"masks":[88],"alongside":[89],"traces,":[95],"spanning":[96],"10":[97],"modalities,":[98],"15":[99],"super-categories,":[100],"108":[102],"specific":[103],"categories.":[104],"Finally,":[105],"introduce":[107],"MedReasoner,":[108],"modular":[110],"framework":[111],"distinctly":[113],"separates":[114],"from":[116],"segmentation:":[117],"an":[118],"MLLM":[119],"reasoner":[120],"optimized":[122],"reinforcement":[124,163],"learning,":[125],"while":[126],"frozen":[128],"segmentation":[129],"expert":[130],"converts":[131],"prompts":[133],"into":[134],"masks,":[135],"alignment":[137],"achieved":[138],"through":[139],"format":[140],"accuracy":[142],"rewards.":[143],"MedReasoner":[144],"achieves":[145],"state-of-the-art":[146],"performance":[147],"U-MRG-14K":[149],"demonstrates":[151],"strong":[152],"generalization":[153],"unseen":[155],"queries,":[157],"underscoring":[158],"significant":[160],"promise":[161],"learning":[164],"interpretable":[166]},"counts_by_year":[],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2026-02-22T00:00:00"}
