{"id":"https://openalex.org/W4408564563","doi":"https://doi.org/10.1109/tcyb.2025.3542223","title":"Feasible Policy Iteration With Guaranteed Safe Exploration","display_name":"Feasible Policy Iteration With Guaranteed Safe Exploration","publication_year":2025,"publication_date":"2025-03-18","ids":{"openalex":"https://openalex.org/W4408564563","doi":"https://doi.org/10.1109/tcyb.2025.3542223","pmid":"https://pubmed.ncbi.nlm.nih.gov/40100688"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2025.3542223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2025.3542223","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028357638","display_name":"Yuhang Zhang","orcid":"https://orcid.org/0009-0006-4272-5730"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuhang Zhang","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-4272-5730","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101620888","display_name":"Yujie Yang","orcid":"https://orcid.org/0000-0001-7222-0019"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujie Yang","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7222-0019","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747108","display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4923-3633","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083537762","display_name":"Yao Lyu","orcid":"https://orcid.org/0000-0003-1539-472X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Lyu","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1539-472X","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067909017","display_name":"Jingliang Duan","orcid":"https://orcid.org/0000-0002-3697-1576"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingliang Duan","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3697-1576","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042575144","display_name":"Zhilong Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhilong Zheng","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Vehicle and Mobility, State Key Laboratory of Intelligent Green Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045712695","display_name":"Dezhao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dezhao Zhang","raw_affiliation_strings":["Beijing Idriverplus Technology Company Ltd., Beijing, China","Beijing Idriverplus Technology Company Ltd, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Idriverplus Technology Company Ltd., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Beijing Idriverplus Technology Company Ltd, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5028357638"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06842752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"55","issue":"5","first_page":"2327","last_page":"2340"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9538000226020813,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9538000226020813,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5036415457725525},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3864242434501648},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22157365083694458}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5036415457725525},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3864242434501648},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22157365083694458}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2025.3542223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2025.3542223","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:40100688","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40100688","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1500936459","display_name":null,"funder_award_id":"2022YFB2502901","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320330357","display_name":"Tsinghua Initiative Scientific Research Program","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1990022107","https://openalex.org/W2145339207","https://openalex.org/W2325770295","https://openalex.org/W2575705757","https://openalex.org/W2586823359","https://openalex.org/W2892521964","https://openalex.org/W2966735560","https://openalex.org/W2968945909","https://openalex.org/W2982316857","https://openalex.org/W2991391803","https://openalex.org/W3007792336","https://openalex.org/W3015082424","https://openalex.org/W3017203298","https://openalex.org/W3089266942","https://openalex.org/W3127910092","https://openalex.org/W4224216677","https://openalex.org/W4312761761","https://openalex.org/W4317553716","https://openalex.org/W4362650413","https://openalex.org/W4366158867","https://openalex.org/W4386285856","https://openalex.org/W4390939851","https://openalex.org/W4391667216","https://openalex.org/W4394006698","https://openalex.org/W4406983280","https://openalex.org/W6687063787","https://openalex.org/W6737893269","https://openalex.org/W6747473740","https://openalex.org/W6747790125","https://openalex.org/W6780587392","https://openalex.org/W6799934450","https://openalex.org/W6804655846","https://openalex.org/W6838566501","https://openalex.org/W6852522985","https://openalex.org/W6857325571"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Safety":[0],"guarantee":[1,65],"is":[2,106],"an":[3],"important":[4],"topic":[5],"when":[6],"training":[7,234],"real-world":[8,77,264],"tasks":[9],"with":[10,115,196],"reinforcement":[11],"learning":[12],"(RL).":[13],"During":[14],"online":[15,69,268],"environmental":[16,89],"exploration,":[17,70],"any":[18],"constraint":[19,51,72,112,229,245],"violation":[20,230],"can":[21,64,180],"lead":[22],"to":[23,29,81,143,160,224,247],"significant":[24],"property":[25,140],"damage":[26],"and":[27,141,186,211],"risks":[28],"personnel.":[30],"Existing":[31],"safe":[32,205],"RL":[33],"methods":[34],"either":[35],"exclusively":[36],"address":[37],"safety":[38,67,84],"concerns":[39],"after":[40],"reaching":[41],"optimality":[42],"or":[43],"incorporate":[44],"a":[45,58,109,255],"certain":[46],"degree":[47],"of":[48,99,121,201,244,270],"tolerance":[49],"for":[50,258],"violations":[52,73,246],"during":[53,68],"training.":[54],"This":[55,103],"article":[56],"proposes":[57],"feasible":[59,97,104,123,134,164,176,189,260],"policy":[60,169,261],"iteration":[61,262],"framework":[62],"that":[63,218],"absolute":[66,83],"i.e.,":[71,154],"never":[74],"happen":[75],"in":[76,86,263],"interactions.":[78],"The":[79],"key":[80],"maintaining":[82,227],"lies":[85],"confining":[87],"the":[88,96,100,118,122,126,130,133,155,163,167,188,197,232,238,249,267],"exploration":[90],"at":[91],"each":[92],"step":[93],"always":[94],"within":[95,162,173],"region":[98,105,124,135,177],"current":[101],"policy.":[102],"described":[107],"by":[108],"newly":[110],"defined":[111],"decay":[113],"function":[114],"uncertainty,":[116],"ensuring":[117],"forward":[119],"invariance":[120],"under":[125],"worst":[127],"case.":[128],"Within":[129],"proposed":[131],"framework,":[132],"maintains":[136],"its":[137,144,174],"monotonic":[138],"expanding":[139],"converges":[142],"maximum":[145],"extent,":[146],"even":[147],"though":[148],"only":[149,157],"local":[150],"samples":[151,161],"are":[152,194],"available,":[153],"agent":[156],"has":[158],"access":[159],"region.":[165,190],"Meanwhile,":[166],"trained":[168],"also":[170],"improves":[171],"monotonically":[172],"corresponding":[175],"if":[178],"one":[179],"use":[181],"different":[182],"updating":[183],"rules":[184],"inside":[185],"outside":[187],"Finally,":[191],"practical":[192],"algorithms":[193,220],"designed":[195],"actor-critic-scenery":[198],"architecture,":[199],"consisting":[200],"three":[202],"modules:":[203],"1)":[204],"exploration;":[206],"2)":[207],"model":[208],"error":[209],"estimation;":[210],"3)":[212],"network":[213],"update.":[214],"Experimental":[215],"results":[216],"indicate":[217],"our":[219],"achieve":[221,248],"performance":[222],"comparable":[223],"baselines":[225],"while":[226],"zero":[228],"throughout":[231],"entire":[233],"process.":[235],"In":[236],"contrast,":[237],"baseline":[239],"algorithm":[240],"typically":[241],"requires":[242],"thousands":[243],"same":[250],"performance.":[251],"These":[252],"findings":[253],"suggest":[254],"substantial":[256],"potential":[257],"applying":[259],"tasks,":[265],"enabling":[266],"evolution":[269],"intricate":[271],"systems.":[272]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
