{"id":"https://openalex.org/W4312445281","doi":"https://doi.org/10.1109/access.2022.3228922","title":"Safe Reinforcement Learning Using Wasserstein Distributionally Robust MPC and Chance Constraint","display_name":"Safe Reinforcement Learning Using Wasserstein Distributionally Robust MPC and Chance Constraint","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4312445281","doi":"https://doi.org/10.1109/access.2022.3228922"},"language":"en","primary_location":{"id":"doi:10.1109/access.2022.3228922","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3228922","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09982609.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09982609.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033874726","display_name":"Arash Bahari Kordabad","orcid":"https://orcid.org/0000-0001-8931-5372"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Arash Bahari Kordabad","raw_affiliation_strings":["Department of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Department of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040098270","display_name":"Rafa\u0142 Wi\u015bniewski","orcid":"https://orcid.org/0000-0001-6719-8427"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Rafael Wisniewski","raw_affiliation_strings":["Department of Electronical Systems, Aalborg University, Aalborg, Denmark"],"affiliations":[{"raw_affiliation_string":"Department of Electronical Systems, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049645185","display_name":"S\u00e9bastien Gros","orcid":"https://orcid.org/0000-0001-6054-2133"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Sebastien Gros","raw_affiliation_strings":["Department of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Department of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5033874726"],"corresponding_institution_ids":["https://openalex.org/I204778367"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.5799,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.82633983,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"10","issue":null,"first_page":"130058","last_page":"130067"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11413","display_name":"Risk and Portfolio Optimization","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9685999751091003,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6393449306488037},{"id":"https://openalex.org/keywords/wasserstein-metric","display_name":"Wasserstein metric","score":0.6105031371116638},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6075080633163452},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5498190522193909},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5105953812599182},{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.4768487215042114},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.4494025409221649},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4469635486602783},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.42945390939712524},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.42848584055900574},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.37561142444610596},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2882925271987915},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.27987152338027954},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.19464361667633057},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.16041073203086853},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11729338765144348}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6393449306488037},{"id":"https://openalex.org/C2777634741","wikidata":"https://www.wikidata.org/wiki/Q768993","display_name":"Wasserstein metric","level":2,"score":0.6105031371116638},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6075080633163452},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5498190522193909},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5105953812599182},{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.4768487215042114},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.4494025409221649},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4469635486602783},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.42945390939712524},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.42848584055900574},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.37561142444610596},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2882925271987915},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27987152338027954},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.19464361667633057},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.16041073203086853},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11729338765144348},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/access.2022.3228922","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3228922","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09982609.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire/b8e235ef-bb96-42f9-9748-0e3ebc226bb8","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/b8e235ef-bb96-42f9-9748-0e3ebc226bb8","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Kordabad, A B, Wisniewski, R & Gros, S 2022, 'Safe Reinforcement Learning Using Wasserstein Distributionally Robust MPC and Chance Constraint', IEEE Access, vol. 10, 9982609, pp. 130058-130067. https://doi.org/10.1109/ACCESS.2022.3228922","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:eb29e505702046b4bd011390ba36c0af","is_oa":true,"landing_page_url":"https://doaj.org/article/eb29e505702046b4bd011390ba36c0af","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 10, Pp 130058-130067 (2022)","raw_type":"article"},{"id":"pmh:oai:ntnuopen.ntnu.no:11250/3045969","is_oa":true,"landing_page_url":"https://hdl.handle.net/11250/3045969","pdf_url":null,"source":{"id":"https://openalex.org/S4306401716","display_name":"Duo Research Archive (University of Oslo)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184942183","host_organization_name":"University of Oslo","host_organization_lineage":["https://openalex.org/I184942183"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access","raw_type":"info:eu-repo/semantics/other"}],"best_oa_location":{"id":"doi:10.1109/access.2022.3228922","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3228922","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09982609.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7099999785423279,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G2096367874","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G3748656914","display_name":null,"funder_award_id":"Norway","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"}],"funders":[{"id":"https://openalex.org/F4320323299","display_name":"Norges Forskningsr\u00e5d","ror":"https://ror.org/00epmv149"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4312445281.pdf","grobid_xml":"https://content.openalex.org/works/W4312445281.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W580164506","https://openalex.org/W1647779468","https://openalex.org/W1968355947","https://openalex.org/W1983916623","https://openalex.org/W2003514920","https://openalex.org/W2006041100","https://openalex.org/W2166292404","https://openalex.org/W2570467197","https://openalex.org/W2912747791","https://openalex.org/W2930426397","https://openalex.org/W2948652605","https://openalex.org/W2949207039","https://openalex.org/W2950882283","https://openalex.org/W2963134136","https://openalex.org/W2963450292","https://openalex.org/W2966735560","https://openalex.org/W2968104655","https://openalex.org/W2997987223","https://openalex.org/W3040274799","https://openalex.org/W3045961302","https://openalex.org/W3048735518","https://openalex.org/W3119655085","https://openalex.org/W3120459386","https://openalex.org/W3124407081","https://openalex.org/W3132905665","https://openalex.org/W3138294267","https://openalex.org/W3157375408","https://openalex.org/W3167122261","https://openalex.org/W3185528958","https://openalex.org/W3186992617","https://openalex.org/W3204061874","https://openalex.org/W3212144282","https://openalex.org/W4211221179","https://openalex.org/W4214717370","https://openalex.org/W4250589301","https://openalex.org/W4252698487","https://openalex.org/W4285379000","https://openalex.org/W4285446220","https://openalex.org/W4285446252","https://openalex.org/W4294562617","https://openalex.org/W4294597284","https://openalex.org/W4296849093","https://openalex.org/W4306353376","https://openalex.org/W4324134829","https://openalex.org/W6617021176","https://openalex.org/W6683517036","https://openalex.org/W6703180097","https://openalex.org/W6750027077","https://openalex.org/W6751535212","https://openalex.org/W6794620985","https://openalex.org/W6850991573"],"related_works":["https://openalex.org/W2051058708","https://openalex.org/W1494268238","https://openalex.org/W154868527","https://openalex.org/W1983207144","https://openalex.org/W2490706771","https://openalex.org/W2480116122","https://openalex.org/W4255576661","https://openalex.org/W1516574938","https://openalex.org/W2625725254","https://openalex.org/W2563912921"],"abstract_inverted_index":{"In":[0,46],"this":[1],"paper,":[2],"we":[3,55],"address":[4],"the":[5,13,39,74,88,95,99,103,114,124,127,134,137,141,159,162],"chance-constrained":[6],"safe":[7,51],"Reinforcement":[8],"Learning":[9],"(RL)":[10],"problem":[11],"using":[12,58],"function":[14,71],"approximators":[15],"based":[16],"on":[17],"Stochastic":[18],"Model":[19,26],"Predictive":[20,27],"Control":[21,28],"(SMPC)":[22],"and":[23,44,119],"Distributionally":[24],"Robust":[25],"(DRMPC).":[29],"We":[30,82],"use":[31,83],"Conditional":[32],"Value":[33],"at":[34,62],"Risk":[35],"(CVaR)":[36],"to":[37,48,73,132,139,157],"measure":[38],"probability":[40],"of":[41,113,116,161],"constraint":[42,76],"violation":[43,77],"safety.":[45],"order":[47],"provide":[49],"a":[50,68,84,110],"policy":[52],"by":[53,94],"construction,":[54],"first":[56],"propose":[57],"parameterized":[59],"nonlinear":[60],"DRMPC":[61,66,108,138],"each":[63],"time":[64],"step.":[65],"optimizes":[67],"finite-horizon":[69],"cost":[70],"subject":[72],"worst-case":[75],"in":[78,136],"an":[79],"ambiguity":[80,100],"set.":[81,101],"statistical":[85],"ball":[86],"around":[87],"empirical":[89],"distribution":[90],"with":[91,151],"radius":[92],"measured":[93],"Wasserstein":[96],"metric":[97],"as":[98],"Unlike":[102],"sample":[104,117],"average":[105],"approximation":[106],"SMPC,":[107],"provides":[109],"probabilistic":[111],"guarantee":[112],"out":[115],"risk":[118],"requires":[120],"lower":[121],"samples":[122],"from":[123],"disturbance.":[125],"Then":[126],"Q-learning":[128],"method":[129],"is":[130],"used":[131],"optimize":[133],"parameters":[135],"achieve":[140],"best":[142],"closed-loop":[143],"performance.":[144],"Wheeled":[145],"Mobile":[146],"Robot":[147],"(WMR)":[148],"path":[149],"planning":[150],"obstacle":[152],"avoidance":[153],"will":[154],"be":[155],"considered":[156],"illustrate":[158],"efficiency":[160],"proposed":[163],"method.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
