{"id":"https://openalex.org/W7133356927","doi":"https://doi.org/10.48550/arxiv.2603.00043","title":"Reinforcement Learning for Control with Probabilistic Stability Guarantee: A Finite-Sample Approach","display_name":"Reinforcement Learning for Control with Probabilistic Stability Guarantee: A Finite-Sample Approach","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7133356927","doi":"https://doi.org/10.48550/arxiv.2603.00043"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00043","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00043","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057269196","display_name":"Minghao Han","orcid":"https://orcid.org/0000-0002-2027-328X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Han, Minghao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127973409","display_name":"Lixian Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Lixian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127998591","display_name":"Chenliang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chenliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127995407","display_name":"Zhipeng Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zhipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128004073","display_name":"Jun Wang (5906)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127897349","display_name":"Wei Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Wei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5057269196"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.8916000127792358,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.8916000127792358,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.08540000021457672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10046","display_name":"Stability and Control of Uncertain Systems","score":0.003000000026077032,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7491999864578247},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.7476000189781189},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.739300012588501},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5629000067710876},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.5027999877929688},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.373199999332428},{"id":"https://openalex.org/keywords/square","display_name":"Square (algebra)","score":0.3671000003814697},{"id":"https://openalex.org/keywords/mean-square","display_name":"Mean square","score":0.35040000081062317}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7491999864578247},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.7476000189781189},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.739300012588501},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5629000067710876},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.504800021648407},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.5027999877929688},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.41359999775886536},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4050000011920929},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.373199999332428},{"id":"https://openalex.org/C135692309","wikidata":"https://www.wikidata.org/wiki/Q111124","display_name":"Square (algebra)","level":2,"score":0.3671000003814697},{"id":"https://openalex.org/C2988709989","wikidata":"https://www.wikidata.org/wiki/Q85784623","display_name":"Mean square","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C162392398","wikidata":"https://www.wikidata.org/wiki/Q272404","display_name":"Finite set","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C143170015","wikidata":"https://www.wikidata.org/wiki/Q17007850","display_name":"Stability conditions","level":3,"score":0.29120001196861267},{"id":"https://openalex.org/C24404364","wikidata":"https://www.wikidata.org/wiki/Q7246846","display_name":"Probabilistic analysis of algorithms","level":3,"score":0.2815999984741211},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2800999879837036},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00043","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00043","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5116265416145325,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,108],"paper":[1],"presents":[2],"a":[3,26,37,65,97,111,126],"novel":[4],"approach":[5],"to":[6,56,85],"reinforcement":[7],"learning":[8,72],"(RL)":[9],"for":[10,69],"control":[11,117],"systems":[12],"that":[13,30,79],"provides":[14],"probabilistic":[15,27],"stability":[16,28,34,46,120],"guarantees":[17],"using":[18,35],"finite":[19,38,130],"data.":[20,131],"Leveraging":[21],"Lyapunov's":[22],"method,":[23],"we":[24,63],"propose":[25],"theorem":[29,68],"ensures":[31],"mean":[32],"square":[33],"only":[36],"number":[39,50],"of":[40,45,53,90],"sampled":[41],"trajectories.":[42],"The":[43,88],"probability":[44],"increases":[47],"with":[48,129],"the":[49,81,103],"and":[51,73,116,122],"length":[52],"trajectories,":[54],"converging":[55],"certainty":[57],"as":[58],"data":[59],"size":[60],"grows.":[61],"Additionally,":[62],"derive":[64],"policy":[66,71],"gradient":[67],"stabilizing":[70],"develop":[74],"an":[75],"RL":[76,115],"algorithm,":[77],"L-REINFORCE,":[78],"extends":[80],"classical":[82],"REINFORCE":[83],"algorithm":[84],"stabilization":[86],"problems.":[87],"effectiveness":[89],"L-REINFORCE":[91],"is":[92],"demonstrated":[93],"through":[94],"simulations":[95],"on":[96],"Cartpole":[98],"task,":[99],"where":[100],"it":[101],"outperforms":[102],"baseline":[104],"in":[105,125],"ensuring":[106],"stability.":[107],"work":[109],"bridges":[110],"critical":[112],"gap":[113],"between":[114],"theory,":[118],"enabling":[119],"analysis":[121],"controller":[123],"design":[124],"model-free":[127],"framework":[128]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
