{"id":"https://openalex.org/W7160294315","doi":"https://doi.org/10.48550/arxiv.2605.02552","title":"Recurrent Deep Reinforcement Learning for Chemotherapy Control under Partial Observability","display_name":"Recurrent Deep Reinforcement Learning for Chemotherapy Control under Partial Observability","publication_year":2026,"publication_date":"2026-05-04","ids":{"openalex":"https://openalex.org/W7160294315","doi":"https://doi.org/10.48550/arxiv.2605.02552"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.02552","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02552","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.02552","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135361631","display_name":"Firas Mohamed Elamine Kiram","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kiram, Firas Mohamed Elamine","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007176011","display_name":"Imane Youkana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Youkana, Imane","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102720365","display_name":"Rachida Saouli","orcid":"https://orcid.org/0000-0002-4299-2664"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saouli, Rachida","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026617079","display_name":"Gian Antonio Susto","orcid":"https://orcid.org/0000-0001-5739-9639"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Susto, Gian Antonio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5052087453","display_name":"La\u00efd Kahloul","orcid":"https://orcid.org/0000-0002-9739-7715"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kahloul, Laid","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11829","display_name":"Mathematical Biology Tumor Growth","score":0.9041000008583069,"subfield":{"id":"https://openalex.org/subfields/2611","display_name":"Modeling and Simulation"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11829","display_name":"Mathematical Biology Tumor Growth","score":0.9041000008583069,"subfield":{"id":"https://openalex.org/subfields/2611","display_name":"Modeling and Simulation"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11235","display_name":"Statistical Methods in Clinical Trials","score":0.009399999864399433,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10621","display_name":"Gene Regulatory Network Analysis","score":0.005799999926239252,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.9430999755859375},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6256999969482422},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5576000213623047},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5307000279426575},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5027999877929688},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.45820000767707825},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.4546000063419342}],"concepts":[{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.9430999755859375},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6256999969482422},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5576000213623047},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5307000279426575},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5209000110626221},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5027999877929688},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.45820000767707825},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.4546000063419342},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.42010000348091125},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33739998936653137},{"id":"https://openalex.org/C2776694085","wikidata":"https://www.wikidata.org/wiki/Q974135","display_name":"Chemotherapy","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3059000074863434},{"id":"https://openalex.org/C77350462","wikidata":"https://www.wikidata.org/wiki/Q1125472","display_name":"Confounding","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.02552","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02552","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.02552","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02552","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.49988481402397156,"display_name":"Good health and well-being","id":"https://metadata.un.org/sdg/3"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Chemotherapy":[0],"dose":[1],"optimization":[2],"can":[3,47],"be":[4],"formulated":[5],"as":[6],"a":[7,35,59],"dynamic":[8],"treatment":[9],"regime,":[10],"requiring":[11],"sequential":[12],"decisions":[13],"under":[14,51,120,130],"uncertainty":[15,100],"that":[16,145],"must":[17],"balance":[18],"tumor":[19,136],"suppression":[20,137],"against":[21,84],"toxicity.":[22],"However,":[23],"most":[24],"reinforcement":[25],"learning":[26],"approaches":[27],"assume":[28],"full":[29,121],"observability":[30,122],"of":[31],"the":[32,72],"patient":[33],"state,":[34],"condition":[36],"rarely":[37],"met":[38],"in":[39],"clinical":[40],"practice.":[41],"We":[42],"investigate":[43],"whether":[44],"memory-augmented":[45],"policies":[46,147],"improve":[48],"chemotherapy":[49],"control":[50],"partial":[52,131],"observability.":[53],"To":[54],"this":[55],"end,":[56],"we":[57],"employ":[58],"recurrent":[60,82],"TD3-based":[61],"approach":[62],"with":[63,133],"separate":[64],"LSTM":[65],"actor-critic":[66],"networks":[67],"and":[68,80,87,91,101,104,126,138],"evaluate":[69],"it":[70],"on":[71],"AhnChemoEnv":[73],"benchmark":[74],"from":[75,109],"DTR-Bench,":[76],"considering":[77],"both":[78],"off-policy":[79],"on-policy":[81],"architectures":[83],"feed-forward":[85],"TD3":[86],"Soft":[88],"Actor-Critic.":[89],"Pharmacokinetic":[90],"pharmacodynamic":[92],"variability":[93],"are":[94,148],"held":[95],"fixed":[96],"to":[97,105],"isolate":[98],"hidden-state":[99],"observation":[102],"noise":[103],"avoid":[106],"confounding":[107],"effects":[108],"inter-patient":[110],"variability.":[111],"Across":[112],"ten":[113],"random":[114],"seeds,":[115],"recurrence":[116],"yields":[117],"modest":[118],"benefit":[119],"but":[123],"substantially":[124],"stronger":[125],"more":[127,134],"stable":[128],"performance":[129],"observability,":[132],"consistent":[135],"improved":[139],"normal-cell":[140],"preservation.":[141],"These":[142],"findings":[143],"indicate":[144],"memory-based":[146],"particularly":[149],"beneficial":[150],"when":[151],"clinically":[152],"relevant":[153],"state":[154],"information":[155],"is":[156],"incomplete":[157],"or":[158],"noisy.":[159]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
