{"id":"https://openalex.org/W4382935770","doi":"https://doi.org/10.23919/acc55779.2023.10156372","title":"Continuous-Time Policy Optimization","display_name":"Continuous-Time Policy Optimization","publication_year":2023,"publication_date":"2023-05-31","ids":{"openalex":"https://openalex.org/W4382935770","doi":"https://doi.org/10.23919/acc55779.2023.10156372"},"language":"en","primary_location":{"id":"doi:10.23919/acc55779.2023.10156372","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc55779.2023.10156372","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 American Control Conference (ACC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029301375","display_name":"Guojian Zhan","orcid":"https://orcid.org/0000-0002-1246-4860"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guojian Zhan","raw_affiliation_strings":["Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057254330","display_name":"Yuxuan Jiang","orcid":"https://orcid.org/0000-0003-4285-0495"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxuan Jiang","raw_affiliation_strings":["Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067909017","display_name":"Jingliang Duan","orcid":"https://orcid.org/0000-0002-3697-1576"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingliang Duan","raw_affiliation_strings":["University of Science and Technology Beijing,School of Mechanical Engineering,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing,School of Mechanical Engineering,Beijing,China,100084","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747108","display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100640936","display_name":"Bo Cheng","orcid":"https://orcid.org/0000-0002-1753-2922"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Cheng","raw_affiliation_strings":["Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102533063","display_name":"Keqiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keqiang Li","raw_affiliation_strings":["Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5029301375"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.8741,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78284577,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3382","last_page":"3388"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5939828157424927}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5939828157424927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/acc55779.2023.10156372","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc55779.2023.10156372","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 American Control Conference (ACC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W364367221","https://openalex.org/W1587799944","https://openalex.org/W2400458653","https://openalex.org/W2901175338","https://openalex.org/W2963755523","https://openalex.org/W2965543245","https://openalex.org/W3015082424","https://openalex.org/W3096414164","https://openalex.org/W3167264379","https://openalex.org/W3187048847","https://openalex.org/W3209549283","https://openalex.org/W4206582666","https://openalex.org/W4285071855","https://openalex.org/W4287554209","https://openalex.org/W4362650413","https://openalex.org/W6752307458","https://openalex.org/W6779570212","https://openalex.org/W6785651428","https://openalex.org/W6787292459"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Discretized":[0],"dynamics":[1],"is":[2,14,27,72],"widespread":[3],"in":[4,106],"numerical":[5],"optimization":[6,50,64],"and":[7,112,125,143],"optimal":[8,37,43],"control.":[9],"However,":[10],"the":[11,18,23,40,54,59,75,80,88,98,107,136],"physical":[12],"system":[13,141],"inherently":[15],"continuous":[16],"at":[17],"macroscopic":[19],"scale,":[20],"thus":[21],"handling":[22],"original":[24],"continuous-time":[25,41,48,140],"problem":[26],"desirable.":[28],"In":[29],"this":[30],"paper,":[31],"we":[32,116],"focus":[33],"on":[34,119],"learning":[35],"an":[36],"policy":[38,49,60,138],"under":[39],"finite-horizon":[42],"control":[44],"setting.":[45],"We":[46,94],"introduce":[47],"(CTPO),":[51],"which":[52,86],"employs":[53],"adjoint":[55],"method":[56],"to":[57,73,83,101],"calculate":[58],"gradient,":[61],"then":[62],"implements":[63],"by":[65],"gradient":[66],"descent.":[67],"The":[68,132],"nature":[69],"of":[70,77,90,110],"CTPO":[71],"minimize":[74],"integral":[76],"Hamiltonian":[78],"over":[79],"time":[81],"horizon":[82],"approach":[84],"optimality,":[85],"fits":[87],"framework":[89],"Pontryagin\u2019s":[91],"minimum":[92],"principle.":[93],"further":[95],"reveal":[96],"that":[97,135],"intrinsic":[99],"connection":[100],"its":[102],"discrete-time":[103],"counterpart":[104],"lies":[105],"different":[108],"order":[109],"differentiation":[111],"discretization":[113],"operations.":[114],"Finally":[115],"conduct":[117],"experiments":[118],"a":[120,126],"linear":[121],"quadratic":[122],"regulator":[123],"(LQR)":[124],"nonlinear":[127],"vehicle":[128],"trajectory":[129],"tracking":[130],"task.":[131],"results":[133],"demonstrate":[134],"trained":[137],"retains":[139],"information":[142],"achieves":[144],"high":[145],"accuracy.":[146]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
