{"id":"https://openalex.org/W4226275970","doi":"https://doi.org/10.1109/robio54168.2021.9739232","title":"Shiftable Dynamic Policy Programming for Efficient and Robust Reinforcement Learning Control","display_name":"Shiftable Dynamic Policy Programming for Efficient and Robust Reinforcement Learning Control","publication_year":2021,"publication_date":"2021-12-27","ids":{"openalex":"https://openalex.org/W4226275970","doi":"https://doi.org/10.1109/robio54168.2021.9739232"},"language":"en","primary_location":{"id":"doi:10.1109/robio54168.2021.9739232","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio54168.2021.9739232","pdf_url":null,"source":{"id":"https://openalex.org/S4363607846","display_name":"2021 IEEE International Conference on Robotics and Biomimetics (ROBIO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017413794","display_name":"Zhiwei Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhiwei Shang","raw_affiliation_strings":["University of Chinese Academy of Sciences,China","University of Chinese Academy of Sciences, China","Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100614997","display_name":"Huiyun Li","orcid":"https://orcid.org/0000-0003-0157-1393"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huiyun Li","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems,Shenzhen,China","Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems,Shenzhen,China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011048472","display_name":"Yunduan Cui","orcid":"https://orcid.org/0000-0001-5539-4260"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunduan Cui","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems,Shenzhen,China","Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems,Shenzhen,China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5017413794"],"corresponding_institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.4363,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57238273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1688","last_page":"1693"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.8651999831199646,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.8651999831199646,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.8352000117301941,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8169254064559937},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6842133402824402},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.6724774837493896},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.43949243426322937},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39394432306289673},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11006110906600952}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8169254064559937},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6842133402824402},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.6724774837493896},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.43949243426322937},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39394432306289673},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11006110906600952}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/robio54168.2021.9739232","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio54168.2021.9739232","pdf_url":null,"source":{"id":"https://openalex.org/S4363607846","display_name":"2021 IEEE International Conference on Robotics and Biomimetics (ROBIO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109","https://openalex.org/W4362501864","https://openalex.org/W2001405890"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"a":[3,52,57,74,107],"novel":[4],"value":[5],"function-based":[6],"reinforcement":[7],"learning":[8,70],"(RL)":[9],"approach,":[10],"Shiftable":[11],"Dynamic":[12,36],"Policy":[13,37],"Programming":[14],"(SDPP),":[15],"is":[16],"proposed":[17],"to":[18,60,67],"improve":[19],"the":[20,41,46,63,68,79,124],"sample":[21],"efficiency":[22],"and":[23,48,72,113,119],"robustness":[24,121],"of":[25,110],"RL":[26,34],"in":[27,88,104],"control":[28,62,86],"problems.":[29],"Extended":[30],"from":[31],"previous":[32,49],"sample-efficient":[33],"method":[35],"Programming(DPP)":[38],"that":[39],"punishes":[40],"over-large":[42],"Kullback-Leibler":[43],"divergency":[44],"between":[45],"updated":[47],"policies":[50],"as":[51],"penalty":[53,64],"term,":[54],"SDPP":[55,99],"employs":[56],"shiftable":[58,80],"parameter":[59],"dynamically":[61],"term":[65],"according":[66],"historical":[69],"performances":[71],"designs":[73],"general":[75],"shift":[76],"strategy":[77],"for":[78],"parameter.":[81],"Evaluated":[82],"by":[83],"several":[84],"benchmark":[85],"tasks":[87],"OpenAI":[89],"gym,":[90],"based":[91],"on":[92],"agent\u2019s":[93],"behaviors":[94],"facing":[95],"various":[96],"reward":[97],"settings,":[98],"successfully":[100],"demonstrates":[101],"its":[102],"capability":[103],"automatically":[105],"selecting":[106],"suitable":[108],"smoothness":[109],"policy":[111],"update":[112],"therefore":[114],"achieves":[115],"both":[116],"faster":[117],"convergence":[118],"better":[120],"compared":[122],"with":[123],"original":[125],"DPP.":[126]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}