{"id":"https://openalex.org/W7123338676","doi":"https://doi.org/10.1109/cdc57313.2025.11313011","title":"Feasibility Informed Advantage Weighted Regression for Persistent Safety in Offline Reinforcement Learning","display_name":"Feasibility Informed Advantage Weighted Regression for Persistent Safety in Offline Reinforcement Learning","publication_year":2025,"publication_date":"2025-12-09","ids":{"openalex":"https://openalex.org/W7123338676","doi":"https://doi.org/10.1109/cdc57313.2025.11313011"},"language":null,"primary_location":{"id":"doi:10.1109/cdc57313.2025.11313011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc57313.2025.11313011","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 64th Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109676911","display_name":"Prajwal Koirala","orcid":null},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prajwal Koirala","raw_affiliation_strings":["Iowa State University,Ames,IA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Iowa State University,Ames,IA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031212270","display_name":"Zhanhong Jiang","orcid":"https://orcid.org/0000-0001-5363-7898"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhanhong Jiang","raw_affiliation_strings":["Iowa State University,Ames,IA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Iowa State University,Ames,IA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111030835","display_name":"Soumik Sarkar","orcid":"https://orcid.org/0000-0001-5766-2871"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Soumik Sarkar","raw_affiliation_strings":["Iowa State University,Ames,IA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Iowa State University,Ames,IA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122847164","display_name":"Cody Fleming","orcid":null},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cody Fleming","raw_affiliation_strings":["Iowa State University,Ames,IA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Iowa State University,Ames,IA","institution_ids":["https://openalex.org/I173911158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.79230319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5837","last_page":"5844"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6013000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6013000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2232999950647354,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.027699999511241913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7773000001907349},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6258999705314636},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.35249999165534973},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.3375000059604645},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.3312000036239624}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7773000001907349},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.704200029373169},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6258999705314636},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39879998564720154},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38499999046325684},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.35249999165534973},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2583000063896179},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.25459998846054077},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc57313.2025.11313011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc57313.2025.11313011","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 64th Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7756056785583496}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Safe":[0],"offline":[1,19,87],"reinforcement":[2],"learning":[3,169],"aims":[4],"to":[5,14,44,133],"learn":[6],"policies":[7,170],"that":[8,67,119,141,158],"maximize":[9],"cumulative":[10],"rewards":[11],"while":[12,125],"adhering":[13],"safety":[15,28,45,70,121,165],"constraints,":[16],"using":[17],"only":[18],"data":[20],"for":[21,86,103],"training.":[22,106],"A":[23],"key":[24],"challenge":[25],"is":[26],"balancing":[27,164],"and":[29,39,166],"performance,":[30],"particularly":[31],"when":[32],"the":[33,120,172],"policy":[34,79,91,95],"encounters":[35],"out-of-distribution":[36],"(OOD)":[37],"states":[38],"actions,":[40],"which":[41],"can":[42],"lead":[43],"violations":[46],"or":[47],"overly":[48],"conservative":[49],"behavior":[50],"during":[51],"deployment.":[52],"To":[53],"address":[54,134],"these":[55],"challenges,":[56],"we":[57,129],"introduce":[58],"Feasibility":[59],"Informed":[60],"Advantage":[61,113],"Weighted":[62,114],"Actor-Critic":[63],"(FAWAC),":[64],"a":[65,109,131,135],"method":[66],"prioritizes":[68],"persistent":[69],"in":[71,93,168],"constrained":[72,104],"Markov":[73],"decision":[74],"processes":[75],"(CMDPs).":[76],"FAWAC":[77,117,159],"formulates":[78],"optimization":[80],"with":[81],"feasibility":[82],"conditions":[83],"derived":[84],"specifically":[85],"datasets,":[88],"enabling":[89],"safe":[90],"updates":[92],"non-parametric":[94],"space,":[96],"followed":[97],"by":[98],"projection":[99],"into":[100,112],"parametric":[101],"space":[102],"actor":[105],"By":[107],"incorporating":[108],"cost-advantage":[110],"term":[111],"Regression":[115],"(AWR),":[116],"ensures":[118],"constraints":[122],"are":[123,147],"respected":[124],"maximizing":[126],"performance.":[127],"Additionally,":[128],"propose":[130],"strategy":[132],"more":[136],"challenging":[137],"class":[138],"of":[139],"problems":[140],"involves":[142],"tempting":[143],"datasets":[144],"where":[145],"trajectories":[146],"predominantly":[148],"high-rewarded":[149],"but":[150],"unsafe.":[151],"Empirical":[152],"evaluations":[153],"on":[154],"standard":[155],"benchmarks":[156],"demonstrate":[157],"achieves":[160],"strong":[161],"results,":[162],"effectively":[163],"performance":[167],"from":[171],"static":[173],"datasets.":[174]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-14T00:00:00"}
