{"id":"https://openalex.org/W4385568106","doi":"https://doi.org/10.1145/3580305.3599800","title":"Deep Offline Reinforcement Learning for Real-world Treatment Optimization Applications","display_name":"Deep Offline Reinforcement Learning for Real-world Treatment Optimization Applications","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385568106","doi":"https://doi.org/10.1145/3580305.3599800"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599800","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599800","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599800","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599800","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103150766","display_name":"Mila Nambiar","orcid":"https://orcid.org/0000-0001-9445-6516"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Mila Nambiar","raw_affiliation_strings":["Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101775931","display_name":"Supriyo Ghosh","orcid":"https://orcid.org/0000-0001-7275-3296"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Supriyo Ghosh","raw_affiliation_strings":["Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044915296","display_name":"Priscilla Ong","orcid":"https://orcid.org/0009-0007-0598-8571"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Priscilla Ong","raw_affiliation_strings":["Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003191938","display_name":"Yu En Chan","orcid":"https://orcid.org/0000-0001-7419-0327"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yu En Chan","raw_affiliation_strings":["Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060561623","display_name":"Yong Mong Bee","orcid":"https://orcid.org/0000-0002-5482-2646"},"institutions":[{"id":"https://openalex.org/I2251586001","display_name":"Singapore General Hospital","ror":"https://ror.org/036j6sg82","country_code":"SG","type":"healthcare","lineage":["https://openalex.org/I2251586001"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yong Mong Bee","raw_affiliation_strings":["Singapore General Hospital, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Singapore General Hospital, Singapore, Singapore","institution_ids":["https://openalex.org/I2251586001"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025934400","display_name":"Pavitra Krishnaswamy","orcid":"https://orcid.org/0000-0001-5893-4306"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Pavitra Krishnaswamy","raw_affiliation_strings":["Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A*STAR, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103150766"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":3.2718,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.93555146,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4673","last_page":"4684"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10821","display_name":"Cardiovascular Function and Risk Factors","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/2705","display_name":"Cardiology and Cardiovascular Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10218","display_name":"Sepsis Diagnosis and Treatment","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8505644798278809},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7608233690261841},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6698057651519775},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5916052460670471},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5806254148483276},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.437843918800354}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8505644798278809},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7608233690261841},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6698057651519775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5916052460670471},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5806254148483276},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.437843918800354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3580305.3599800","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599800","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599800","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3580305.3599800","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599800","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599800","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5099999904632568}],"awards":[],"funders":[{"id":"https://openalex.org/F4320316262","display_name":"Duke-NUS Medical School","ror":"https://ror.org/02j1m6098"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385568106.pdf","grobid_xml":"https://content.openalex.org/works/W4385568106.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1917795079","https://openalex.org/W2073241381","https://openalex.org/W2094509095","https://openalex.org/W2104933073","https://openalex.org/W2110020655","https://openalex.org/W2110660649","https://openalex.org/W2396881363","https://openalex.org/W2535327675","https://openalex.org/W2746553466","https://openalex.org/W2766329790","https://openalex.org/W2766447205","https://openalex.org/W2898645858","https://openalex.org/W2947150733","https://openalex.org/W2991355586","https://openalex.org/W2991641017","https://openalex.org/W3026443214","https://openalex.org/W3028766998","https://openalex.org/W3105102281","https://openalex.org/W3112765753","https://openalex.org/W3126468206","https://openalex.org/W3138013258","https://openalex.org/W3187059224","https://openalex.org/W4239958308","https://openalex.org/W4245694139","https://openalex.org/W4247993926","https://openalex.org/W4301178047","https://openalex.org/W6777656069","https://openalex.org/W6922016914"],"related_works":["https://openalex.org/W4380075502","https://openalex.org/W4223943233","https://openalex.org/W4312200629","https://openalex.org/W4360585206","https://openalex.org/W4364306694","https://openalex.org/W4380086463","https://openalex.org/W4225161397","https://openalex.org/W3014300295","https://openalex.org/W3164822677","https://openalex.org/W2795261237"],"abstract_inverted_index":{"There":[0],"is":[1,47],"increasing":[2],"interest":[3],"in":[4,12,80,86,102,194,199],"data-driven":[5],"approaches":[6,105],"for":[7,153],"recommending":[8],"optimal":[9],"treatment":[10,58,157],"strategies":[11],"many":[13],"chronic":[14],"disease":[15],"management":[16],"and":[17,35,49,116,131,155,169,174,181,198,204],"critical":[18],"care":[19],"applications.":[20],"Reinforcement":[21],"learning":[22],"methods":[23,64],"are":[24,75],"well-suited":[25],"to":[26,77,106,121,137,159],"this":[27,52,125],"sequential":[28],"decision-making":[29],"problem,":[30],"but":[31],"must":[32],"be":[33,122],"trained":[34],"evaluated":[36],"exclusively":[37],"on":[38,149],"retrospective":[39,114],"medical":[40],"record":[41],"datasets":[42],"as":[43,90],"direct":[44],"online":[45],"exploration":[46],"unsafe":[48],"infeasible.":[50],"Despite":[51],"requirement,":[53],"the":[54,113,163],"vast":[55],"majority":[56],"of":[57,162,179],"optimization":[59,158],"studies":[60],"use":[61],"off-policy":[62,168],"RL":[63,143,171],"(e.g.,":[65],"Double":[66],"Deep":[67],"Q":[68],"Networks":[69],"(DDQN)":[70],"or":[71],"its":[72],"variants)":[73],"that":[74,187],"known":[76],"perform":[78,146],"poorly":[79],"purely":[81],"offline":[82,87,142,170],"settings.":[83],"Recent":[84],"advances":[85],"RL,":[88],"such":[89],"Conservative":[91],"Q-Learning":[92],"(CQL),":[93],"offer":[94],"a":[95,129,177],"suitable":[96],"alternative.":[97],"But":[98],"there":[99],"remain":[100],"challenges":[101],"adapting":[103],"these":[104],"real-world":[107,151],"applications":[108],"where":[109],"suboptimal":[110],"examples":[111],"dominate":[112],"dataset":[115],"strict":[117],"safety":[118,205],"constraints":[119],"need":[120],"satisfied.":[123],"In":[124],"work,":[126],"we":[127,185],"introduce":[128],"practical":[130],"theoretically":[132],"grounded":[133],"transition":[134],"sampling":[135],"approach":[136,165,190],"address":[138],"action":[139],"imbalance":[140],"during":[141],"training.":[144],"We":[145],"extensive":[147],"experiments":[148],"two":[150],"tasks":[152],"diabetes":[154],"sepsis":[156],"compare":[160],"performance":[161],"proposed":[164,189],"against":[166],"prominent":[167],"baselines":[172],"(DDQN":[173],"CQL).":[175],"Across":[176],"range":[178],"principled":[180],"clinically":[182],"relevant":[183,202],"metrics,":[184],"show":[186],"our":[188],"enables":[191],"substantial":[192],"improvements":[193],"expected":[195],"health":[196],"outcomes":[197],"consistency":[200],"with":[201],"practice":[203],"guidelines.":[206]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
