{"id":"https://openalex.org/W4391020310","doi":"https://doi.org/10.1109/cdc49753.2023.10383560","title":"Physics-Model-Regulated Deep Reinforcement Learning Towards Safety &amp; Stability Guarantees","display_name":"Physics-Model-Regulated Deep Reinforcement Learning Towards Safety &amp; Stability Guarantees","publication_year":2023,"publication_date":"2023-12-13","ids":{"openalex":"https://openalex.org/W4391020310","doi":"https://doi.org/10.1109/cdc49753.2023.10383560"},"language":"en","primary_location":{"id":"doi:10.1109/cdc49753.2023.10383560","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/cdc49753.2023.10383560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030370370","display_name":"Hongpeng Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Hongpeng Cao","raw_affiliation_strings":["Hongpeng Cao is with School of Engineering and Design, Technical University of Munich,Munich,Germany,85748"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hongpeng Cao is with School of Engineering and Design, Technical University of Munich,Munich,Germany,85748","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056011834","display_name":"Yanbing Mao","orcid":"https://orcid.org/0000-0002-7233-4179"},"institutions":[{"id":"https://openalex.org/I185443292","display_name":"Wayne State University","ror":"https://ror.org/01070mq45","country_code":"US","type":"education","lineage":["https://openalex.org/I185443292"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanbing Mao","raw_affiliation_strings":["Wayne State University,Yanbing Mao is with Engineering Technology Division,Detroit,MI,USA,48201"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wayne State University,Yanbing Mao is with Engineering Technology Division,Detroit,MI,USA,48201","institution_ids":["https://openalex.org/I185443292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067032971","display_name":"Lui Sha","orcid":"https://orcid.org/0000-0002-5578-0791"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lui Sha","raw_affiliation_strings":["University of Illinois at Urbana-Champaign,Department of Computer Science,Urbana,IL,USA,61801"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign,Department of Computer Science,Urbana,IL,USA,61801","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060442004","display_name":"Marco Caccamo","orcid":"https://orcid.org/0000-0003-2328-044X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Marco Caccamo","raw_affiliation_strings":["School of Engineering and Design and Institute of Robotics and Machine Intelligence, Technical University of Munich,Munich,Germany","School of Engineering and Design and Institute of Robotics and Machine Intelligence, Technical University of Munich, Munich, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Engineering and Design and Institute of Robotics and Machine Intelligence, Technical University of Munich,Munich,Germany","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"School of Engineering and Design and Institute of Robotics and Machine Intelligence, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5030370370"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.1685,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59874231,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"17","issue":null,"first_page":"8306","last_page":"8311"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9715999960899353,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8501805067062378},{"id":"https://openalex.org/keywords/phy","display_name":"PHY","score":0.8003252744674683},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5681968331336975},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4741979241371155},{"id":"https://openalex.org/keywords/inverted-pendulum","display_name":"Inverted pendulum","score":0.4621264636516571},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3915313482284546},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2522570490837097},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.15221551060676575},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07103168964385986}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8501805067062378},{"id":"https://openalex.org/C41918916","wikidata":"https://www.wikidata.org/wiki/Q192727","display_name":"PHY","level":4,"score":0.8003252744674683},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5681968331336975},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4741979241371155},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.4621264636516571},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3915313482284546},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2522570490837097},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.15221551060676575},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07103168964385986},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C19247436","wikidata":"https://www.wikidata.org/wiki/Q192727","display_name":"Physical layer","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc49753.2023.10383560","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/cdc49753.2023.10383560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4626356820","display_name":null,"funder_award_id":"CPS-2311084","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W257772133","https://openalex.org/W2155007355","https://openalex.org/W2164479831","https://openalex.org/W2594877703","https://openalex.org/W2747329762","https://openalex.org/W2913668833","https://openalex.org/W2963525569","https://openalex.org/W2966735560","https://openalex.org/W2967727187","https://openalex.org/W3175254947","https://openalex.org/W3186772940","https://openalex.org/W3207110310","https://openalex.org/W3210839039","https://openalex.org/W4292103799","https://openalex.org/W4294555834","https://openalex.org/W4296367603","https://openalex.org/W4312639337","https://openalex.org/W4378713564","https://openalex.org/W6682849425","https://openalex.org/W6684037837","https://openalex.org/W6684921986","https://openalex.org/W6733049761","https://openalex.org/W6738483526","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6762580838","https://openalex.org/W6779812412","https://openalex.org/W6838254230","https://openalex.org/W6841950565","https://openalex.org/W6853620785"],"related_works":["https://openalex.org/W4283023968","https://openalex.org/W1964667553","https://openalex.org/W2001476941","https://openalex.org/W962423920","https://openalex.org/W2390471376","https://openalex.org/W2530058746","https://openalex.org/W1487710470","https://openalex.org/W2387968248","https://openalex.org/W2120821724","https://openalex.org/W1939593940"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,49],"learning":[2,50],"(DRL)":[3],"has":[4],"demonstrated":[5],"impressive":[6],"success":[7],"in":[8,56],"solving":[9],"complex":[10],"control":[11,15,69],"tasks":[12],"by":[13,94],"synthesizing":[14],"policies":[16],"from":[17],"data.":[18],"However,":[19],"the":[20,39,43,77,79,87,90,100,105],"safety":[21,82],"and":[22,34,63,70,83,111],"stability":[23,84],"of":[24,89],"applying":[25],"DRL":[26],"to":[27],"safety-critical":[28],"systems":[29],"remain":[30],"a":[31,45,60],"primary":[32],"concern":[33],"challenging":[35],"problem.":[36],"To":[37],"address":[38],"problem,":[40],"we":[41],"propose":[42],"Phy-DRL:":[44],"novel":[46,55],"physics-model-regulated":[47,61],"deep":[48],"framework.":[51],"The":[52,73],"Phy-DRL":[53,78,91,106],"is":[54,92],"two":[57],"architectural":[58],"designs:":[59],"reward":[62],"residual":[64],"control,":[65],"which":[66],"integrates":[67],"physics-model-based":[68],"data-driven":[71],"control.":[72],"concurrent":[74],"designs":[75],"enable":[76],"mathematically":[80],"provable":[81],"guarantees.":[85],"Finally,":[86],"effectiveness":[88],"validated":[93],"an":[95],"inverted":[96],"pendulum":[97],"system.":[98],"Additionally,":[99],"experimental":[101],"results":[102],"demonstrate":[103],"that":[104],"features":[107],"remarkably":[108],"accelerated":[109],"training":[110],"enlarged":[112],"reward.":[113]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
