{"id":"https://openalex.org/W7127602622","doi":"https://doi.org/10.48550/arxiv.2602.03305","title":"medR: Reward Engineering for Clinical Offline Reinforcement Learning via Tri-Drive Potential Functions","display_name":"medR: Reward Engineering for Clinical Offline Reinforcement Learning via Tri-Drive Potential Functions","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127602622","doi":"https://doi.org/10.48550/arxiv.2602.03305"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.03305","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039482287","display_name":"Qianyi Xu","orcid":"https://orcid.org/0009-0009-5982-3953"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Qianyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122969893","display_name":"Gousia Habib","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Habib, Gousia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125080551","display_name":"Feng Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Feng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100954951","display_name":"Yanrui Du","orcid":"https://orcid.org/0000-0002-6821-7690"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Yanrui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chen, Zhihui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhihui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063912611","display_name":"Swapnil Mishra","orcid":"https://orcid.org/0000-0002-8759-5902"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Swapnil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108815886","display_name":"Dilruk Perera","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perera, Dilruk","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125077990","display_name":"Mengling Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Mengling","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5039482287"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.38749998807907104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.38749998807907104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.09830000251531601,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.0877000018954277,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7957000136375427},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6628000140190125},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6107000112533569},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.522599995136261},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5189999938011169},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.3950999975204468}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7957000136375427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7311999797821045},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6628000140190125},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6107000112533569},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6068000197410583},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.522599995136261},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5189999938011169},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5092999935150146},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3950999975204468},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.3176000118255615},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2563000023365021},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.03305","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.03305","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03305","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.03305","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"offers":[3],"a":[4],"powerful":[5],"framework":[6,112],"for":[7,66,119],"optimizing":[8],"dynamic":[9],"treatment":[10],"regimes":[11],"(DTRs).":[12],"However,":[13],"clinical":[14],"RL":[15],"is":[16],"fundamentally":[17],"bottlenecked":[18],"by":[19],"reward":[20,68,75,101,117],"engineering:":[21],"the":[22,74,99,114,125,128],"challenge":[23],"of":[24,81,116,127],"defining":[25],"signals":[26],"that":[27,46],"safely":[28],"and":[29,70,87,97],"effectively":[30],"guide":[31],"policy":[32],"learning":[33],"in":[34],"complex,":[35],"sparse":[36],"offline":[37,67],"environments.":[38],"Existing":[39],"approaches":[40],"often":[41],"rely":[42],"on":[43],"manual":[44],"heuristics":[45],"fail":[47],"to":[48,94,104],"generalize":[49],"across":[50],"diverse":[51],"pathologies.":[52],"To":[53],"address":[54],"this,":[55],"we":[56],"propose":[57],"an":[58],"automated":[59],"pipeline":[60],"leveraging":[61],"Large":[62],"Language":[63],"Models":[64],"(LLMs)":[65],"design":[69,115],"verification.":[71],"We":[72,89],"formulate":[73],"function":[76],"using":[77],"potential":[78],"functions":[79,118],"consisted":[80],"three":[82],"core":[83],"components:":[84],"survival,":[85],"confidence,":[86],"competence.":[88],"further":[90],"introduce":[91],"quantitative":[92],"metrics":[93],"rigorously":[95],"evaluate":[96],"select":[98],"optimal":[100],"structure":[102],"prior":[103],"deployment.":[105],"By":[106],"integrating":[107],"LLM-driven":[108],"domain":[109],"knowledge,":[110],"our":[111],"automates":[113],"specific":[120],"diseases":[121],"while":[122],"significantly":[123],"enhancing":[124],"performance":[126],"resulting":[129],"policies.":[130]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-02-06T00:00:00"}
