{"id":"https://openalex.org/W4368408208","doi":"https://doi.org/10.1145/3576841.3585919","title":"Joint Differentiable Optimization and Verification for Certified Reinforcement Learning","display_name":"Joint Differentiable Optimization and Verification for Certified Reinforcement Learning","publication_year":2023,"publication_date":"2023-05-04","ids":{"openalex":"https://openalex.org/W4368408208","doi":"https://doi.org/10.1145/3576841.3585919"},"language":"en","primary_location":{"id":"doi:10.1145/3576841.3585919","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3576841.3585919","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE 14th International Conference on Cyber-Physical Systems (with CPS-IoT Week 2023)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100408475","display_name":"Yixuan Wang","orcid":"https://orcid.org/0000-0003-0847-8570"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yixuan Wang","raw_affiliation_strings":["Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052356596","display_name":"Sinong Zhan","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Simon Zhan","raw_affiliation_strings":["UC Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"UC Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101595179","display_name":"Zhilu Wang","orcid":"https://orcid.org/0000-0002-6645-262X"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhilu Wang","raw_affiliation_strings":["Northwestern University, Evanston, IL, USA","Northwestern University, Evanston, IL, United States of America"],"affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"Northwestern University, Evanston, IL, United States of America","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009913582","display_name":"Chao Huang","orcid":"https://orcid.org/0000-0002-9300-1787"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chao Huang","raw_affiliation_strings":["University of Liverpool, Liverpool, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Liverpool, Liverpool, United Kingdom","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101934110","display_name":"Zhaoran Wang","orcid":"https://orcid.org/0000-0002-1824-2580"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhaoran Wang","raw_affiliation_strings":["Northwestern University, Evanston, IL, USA","Northwestern University, Evanston, IL, United States of America"],"affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"Northwestern University, Evanston, IL, United States of America","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727948","display_name":"Zhuoran Yang","orcid":"https://orcid.org/0000-0001-5269-9958"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuoran Yang","raw_affiliation_strings":["Yale University, New Haven, CT, USA"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT, USA","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020896290","display_name":"Qi Zhu","orcid":"https://orcid.org/0000-0002-7700-4099"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qi Zhu","raw_affiliation_strings":["Northwestern University, Evanston, IL, United States of America"],"affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, United States of America","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100408475"],"corresponding_institution_ids":["https://openalex.org/I111979921"],"apc_list":null,"apc_paid":null,"fwci":3.1695,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.93006364,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"132","last_page":"141"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8240028619766235},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.75567626953125},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6643895506858826},{"id":"https://openalex.org/keywords/differentiable-function","display_name":"Differentiable function","score":0.5367077589035034},{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.53667813539505},{"id":"https://openalex.org/keywords/certificate","display_name":"Certificate","score":0.5021102428436279},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5008947849273682},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.4501514136791229},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4210212528705597},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.34886741638183594},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30642569065093994},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.21956130862236023},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10757088661193848}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8240028619766235},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75567626953125},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6643895506858826},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.5367077589035034},{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.53667813539505},{"id":"https://openalex.org/C96865113","wikidata":"https://www.wikidata.org/wiki/Q2946816","display_name":"Certificate","level":2,"score":0.5021102428436279},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5008947849273682},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.4501514136791229},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4210212528705597},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.34886741638183594},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30642569065093994},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.21956130862236023},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10757088661193848},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3576841.3585919","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3576841.3585919","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE 14th International Conference on Cyber-Physical Systems (with CPS-IoT Week 2023)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1677250192","display_name":null,"funder_award_id":"2038853","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4769151788","display_name":null,"funder_award_id":"1724341","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7908841254","display_name":null,"funder_award_id":"N00014-19-1-2496","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G8136969180","display_name":null,"funder_award_id":"1834701","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W14096389","https://openalex.org/W1496537269","https://openalex.org/W1607353007","https://openalex.org/W1960944592","https://openalex.org/W1966397236","https://openalex.org/W1986385081","https://openalex.org/W2037009101","https://openalex.org/W2052446317","https://openalex.org/W2054649160","https://openalex.org/W2079089353","https://openalex.org/W2083449425","https://openalex.org/W2106850246","https://openalex.org/W2127159302","https://openalex.org/W2625874945","https://openalex.org/W2890856788","https://openalex.org/W2980176926","https://openalex.org/W2981603589","https://openalex.org/W3021850381","https://openalex.org/W3033405912","https://openalex.org/W3047732556","https://openalex.org/W3048265338","https://openalex.org/W3049327451","https://openalex.org/W3114209678","https://openalex.org/W3126883574","https://openalex.org/W3150718622","https://openalex.org/W3199810006","https://openalex.org/W3211383651","https://openalex.org/W3211752748","https://openalex.org/W3214899300","https://openalex.org/W4214717370","https://openalex.org/W4226362578","https://openalex.org/W4254717764","https://openalex.org/W4287816939","https://openalex.org/W4293024065","https://openalex.org/W4302205997","https://openalex.org/W4312660377","https://openalex.org/W4318685743","https://openalex.org/W6750181461","https://openalex.org/W6767977373","https://openalex.org/W6797246547"],"related_works":["https://openalex.org/W2152670157","https://openalex.org/W2386410636","https://openalex.org/W176737593","https://openalex.org/W2808418668","https://openalex.org/W2903299703","https://openalex.org/W2156021013","https://openalex.org/W2016648086","https://openalex.org/W3105579180","https://openalex.org/W4399157305","https://openalex.org/W4281791088"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1,76],"learning":[2,62,77],"has":[3,46],"been":[4,47],"widely":[5],"studied":[6],"for":[7,16,135],"controller":[8,45],"synthesis":[9],"in":[10,146],"cyber-physical":[11],"systems":[12],"(CPSs).":[13],"In":[14,110],"particular,":[15],"safety-critical":[17],"CPSs,":[18],"it":[19,49],"is":[20,50,91,114],"important":[21],"to":[22,53,128],"formally":[23,159],"certify":[24],"system":[25,161],"properties":[26],"(e.g.,":[27],"safety,":[28],"stability)":[29],"under":[30],"the":[31,44,95,98,140],"learned":[32],"RL":[33],"controller.":[34],"However,":[35],"as":[36],"existing":[37],"methods":[38],"typically":[39],"conduct":[40],"formal":[41,79],"verification":[42,80],"after":[43,58],"learned,":[48],"often":[51],"difficult":[52],"obtain":[54],"any":[55],"certificate,":[56],"even":[57],"many":[59],"iterations":[60],"between":[61],"and":[63,78,83,101,107,125,155,163],"verification.":[64],"To":[65],"address":[66],"this":[67],"challenge,":[68],"we":[69],"propose":[70],"a":[71,85,117],"framework":[72,113,145],"that":[73,158],"jointly":[74],"conducts":[75],"by":[81,94,104],"formulating":[82],"solving":[84],"novel":[86],"bilevel":[87],"optimization":[88],"problem,":[89],"which":[90],"end-to-end":[92],"differentiable":[93],"gradients":[96],"from":[97],"value":[99,121],"function":[100],"certificates":[102],"formulated":[103],"linear":[105],"programs":[106],"semi-definite":[108],"programs.":[109],"experiments,":[111],"our":[112,144],"compared":[115],"with":[116,150],"baseline":[118],"model-based":[119],"stochastic":[120],"gradient":[122],"(SVG)":[123],"method":[124],"its":[126],"extension":[127],"solve":[129],"constrained":[130],"Markov":[131],"Decision":[132],"Processes":[133],"(CMDPs)":[134],"safety.":[136],"The":[137],"results":[138],"demonstrate":[139],"significant":[141],"advantages":[142],"of":[143],"finding":[147],"feasible":[148],"controllers":[149],"certificates,":[151],"i.e.,":[152],"Barrier":[153],"functions":[154,157],"Lyapunov":[156],"ensure":[160],"safety":[162],"stability,":[164],"available":[165],"on":[166],"Github.":[167]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
