{"id":"https://openalex.org/W7153234898","doi":"https://doi.org/10.48550/arxiv.2604.08326","title":"ProMedical: Hierarchical Fine-Grained Criteria Modeling for Medical LLM Alignment via Explicit Injection","display_name":"ProMedical: Hierarchical Fine-Grained Criteria Modeling for Medical LLM Alignment via Explicit Injection","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153234898","doi":"https://doi.org/10.48550/arxiv.2604.08326"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08326","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08326","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08326","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133328844","display_name":"He Geng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Geng, He","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133332893","display_name":"Yangmin Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yangmin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133326000","display_name":"Lixian Lai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lai, Lixian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071180904","display_name":"Qianyun Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Qianyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133367580","display_name":"Hui Ling Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Hui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133381356","display_name":"Zhiyang He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Zhiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020380302","display_name":"Jiaxue Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Jiaxue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110078725","display_name":"Xiaodong Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Xiaodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5133328844"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8937000036239624,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8937000036239624,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.03550000116229057,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0142000000923872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.612500011920929},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.49639999866485596},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.48899999260902405},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.4442000091075897},{"id":"https://openalex.org/keywords/subject-matter-expert","display_name":"Subject-matter expert","score":0.3824999928474426},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.3675999939441681},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.3257000148296356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7466999888420105},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.612500011920929},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.49639999866485596},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.48899999260902405},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.4442000091075897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42399999499320984},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4117000102996826},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.3824999928474426},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31209999322891235},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2872999906539917},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08326","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08326","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08326","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08326","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/5","display_name":"Gender equality","score":0.40223386883735657}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Aligning":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"with":[5,61],"high-stakes":[6],"medical":[7,59,180],"standards":[8],"remains":[9],"a":[10,37,50,54,77,109],"significant":[11],"challenge,":[12],"primarily":[13],"due":[14],"to":[15,75,154,160,174],"the":[16,23,70,123,149],"dissonance":[17],"between":[18],"coarse-grained":[19],"preference":[20],"signals":[21],"and":[22,138,172],"complex,":[24],"multi-dimensional":[25,78],"nature":[26],"of":[27],"clinical":[28,44],"protocols.":[29],"To":[30,101],"bridge":[31],"this":[32,66,104],"gap,":[33],"we":[34,68,106],"introduce":[35],"ProMedical,":[36],"unified":[38],"alignment":[39],"framework":[40],"grounded":[41],"in":[42,178],"fine-grained":[43],"criteria.":[45],"We":[46,165],"first":[47],"construct":[48],"ProMedical-Preference-50k,":[49],"dataset":[51],"generated":[52],"via":[53,127],"human-in-the-loop":[55],"pipeline":[56],"that":[57,121],"augments":[58],"instructions":[60],"rigorous,":[62],"physician-derived":[63],"rubrics.":[64],"Leveraging":[65],"corpus,":[67],"propose":[69],"Explicit":[71],"Criteria":[72],"Injection":[73],"paradigm":[74],"train":[76],"reward":[79,84,170],"model.":[80],"Unlike":[81],"traditional":[82],"scalar":[83],"models,":[85,171],"our":[86,168],"approach":[87],"explicitly":[88],"disentangles":[89],"safety":[90,139],"constraints":[91],"from":[92],"general":[93],"proficiency,":[94],"enabling":[95],"precise":[96],"guidance":[97],"during":[98],"reinforcement":[99],"learning.":[100],"rigorously":[102],"validate":[103],"framework,":[105],"establish":[107],"ProMedical-Bench,":[108],"held-out":[110],"evaluation":[111],"suite":[112],"anchored":[113],"by":[114,136,141],"double-blind":[115],"expert":[116],"adjudication.":[117],"Empirical":[118],"evaluations":[119],"demonstrate":[120],"optimizing":[122],"Qwen3-8B":[124],"base":[125],"model":[126],"ProMedical-RM-guided":[128],"GRPO":[129],"yields":[130],"substantial":[131],"gains,":[132],"improving":[133],"overall":[134],"accuracy":[135],"22.3%":[137],"compliance":[140],"21.7%,":[142],"effectively":[143],"rivaling":[144],"proprietary":[145],"frontier":[146],"models.":[147],"Furthermore,":[148],"aligned":[150],"policy":[151],"generalizes":[152],"robustly":[153],"external":[155],"benchmarks,":[156],"demonstrating":[157],"performance":[158],"comparable":[159],"state-of-the-art":[161],"models":[162],"on":[163],"UltraMedical.":[164],"publicly":[166],"release":[167],"datasets,":[169],"benchmarks":[173],"facilitate":[175],"reproducible":[176],"research":[177],"safety-aware":[179],"alignment.":[181]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-11T00:00:00"}
