{"id":"https://openalex.org/W7123348927","doi":"https://doi.org/10.1109/cdc57313.2025.11312471","title":"Enabling Pareto-Stationarity Exploration in Multi-Objective Reinforcement Learning: A Multi-Objective Weighted-Chebyshev Actor-Critic Approach","display_name":"Enabling Pareto-Stationarity Exploration in Multi-Objective Reinforcement Learning: A Multi-Objective Weighted-Chebyshev Actor-Critic Approach","publication_year":2025,"publication_date":"2025-12-09","ids":{"openalex":"https://openalex.org/W7123348927","doi":"https://doi.org/10.1109/cdc57313.2025.11312471"},"language":null,"primary_location":{"id":"doi:10.1109/cdc57313.2025.11312471","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc57313.2025.11312471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 64th Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094255477","display_name":"FNU Hairi","orcid":"https://orcid.org/0000-0001-7457-9893"},"institutions":[{"id":"https://openalex.org/I183533211","display_name":"University of Wisconsin\u2013Whitewater","ror":"https://ror.org/049hrzs50","country_code":"US","type":"education","lineage":["https://openalex.org/I183533211"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Fnu Hairi","raw_affiliation_strings":["University of Wisconsin-Whitewater,Department of Computer Science,Whitewater,WI,USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Whitewater,Department of Computer Science,Whitewater,WI,USA","institution_ids":["https://openalex.org/I183533211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122874114","display_name":"Yang Jiao","orcid":null},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yang Jiao","raw_affiliation_strings":["Amazon,Seattle,USA"],"affiliations":[{"raw_affiliation_string":"Amazon,Seattle,USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081311731","display_name":"Tianchen Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianchen Zhou","raw_affiliation_strings":["Amazon,Seattle,USA"],"affiliations":[{"raw_affiliation_string":"Amazon,Seattle,USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013868893","display_name":"Haibo Yang","orcid":"https://orcid.org/0000-0002-5715-0849"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haibo Yang","raw_affiliation_strings":["Rochester Institute of Technology,Department of Computing and Information Sciences,Rochester,NY,USA"],"affiliations":[{"raw_affiliation_string":"Rochester Institute of Technology,Department of Computing and Information Sciences,Rochester,NY,USA","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071245598","display_name":"Chaosheng Dong","orcid":null},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chaosheng Dong","raw_affiliation_strings":["Amazon,Seattle,USA"],"affiliations":[{"raw_affiliation_string":"Amazon,Seattle,USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122863227","display_name":"Fan Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Amazon,Seattle,USA"],"affiliations":[{"raw_affiliation_string":"Amazon,Seattle,USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024142144","display_name":"Michinari Momma","orcid":"https://orcid.org/0009-0005-4140-2350"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michinari Momma","raw_affiliation_strings":["Amazon,Seattle,USA"],"affiliations":[{"raw_affiliation_string":"Amazon,Seattle,USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021319176","display_name":"Yan Gao","orcid":"https://orcid.org/0000-0002-8012-1392"},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yan Gao","raw_affiliation_strings":["Amazon,Seattle,USA"],"affiliations":[{"raw_affiliation_string":"Amazon,Seattle,USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122856328","display_name":"Jia Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jia Liu","raw_affiliation_strings":["The Ohio State University,Department of Electrical and Computer Engineering,Columbus,OH,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Electrical and Computer Engineering,Columbus,OH,USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5094255477"],"corresponding_institution_ids":["https://openalex.org/I183533211"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.85833191,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5147","last_page":"5152"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7894999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7894999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.12189999967813492,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.01940000057220459,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7639999985694885},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6065000295639038},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.541100025177002},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5238999724388123},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.5220000147819519},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.43479999899864197},{"id":"https://openalex.org/keywords/time-complexity","display_name":"Time complexity","score":0.32179999351501465}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7639999985694885},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6215000152587891},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6065000295639038},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.541100025177002},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5238999724388123},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.5220000147819519},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45329999923706055},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.43479999899864197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4097999930381775},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3382999897003174},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc57313.2025.11312471","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc57313.2025.11312471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 64th Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"many":[1],"multi-objective":[2],"reinforcement":[3],"learning":[4,119],"(MORL)":[5],"applications,":[6],"being":[7],"able":[8],"to":[9,36,71],"systematically":[10,75],"explore":[11],"the":[12,38,43,65,105,121,147],"Pareto-Stationary":[13],"solutions":[14],"under":[15],"multiple":[16],"non-convex":[17],"reward":[18],"objectives":[19],"with":[20,76],"theoretical":[21],"finite-time":[22,77],"sample":[23,78,122],"complexity":[24,79,82,123],"guarantee":[25],"is":[26],"an":[27,88,97],"important":[28,44],"and":[29,41,68],"yet":[30],"under-explored":[31],"problem.":[32],"This":[33],"motivates":[34],"us":[35],"take":[37],"first":[39],"step":[40],"fill":[42],"gap":[45],"in":[46,49,95,114],"MORL.":[47],"Specifically,":[48],"this":[50],"paper,":[51],"we":[52],"propose":[53],"a":[54,109,140],"MultiObjective":[55],"weighted-CHebyshev":[56],"Actor-critic":[57],"(MOCHA)":[58],"algorithm":[59,86,151],"for":[60,124],"MORL,":[61],"which":[62],"judiciously":[63],"integrates":[64],"weighted-Chebychev":[66],"(WC)":[67],"actor-critic":[69],"framework":[70],"enable":[72],"Pareto-Stationarity":[73],"exploration":[74,126],"guarantee.":[80],"Sample":[81],"result":[83],"of":[84,108,149],"MOCHA":[85,150],"reveals":[87],"interesting":[89],"dependency":[90],"on":[91,139],"p<inf":[92,101],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[93,102],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">min</inf>":[94,103],"finding":[96],"\u03f5-Pareto-Stationary":[98],"solution,":[99],"where":[100],"denotes":[104],"minimum":[106],"entry":[107],"given":[110],"weight":[111],"vector":[112],"p":[113],"WC-scalarization.":[115],"By":[116],"carefully":[117],"choosing":[118],"rates,":[120],"each":[125],"can":[127],"be":[128],"$\\tilde":[129],"{\\mathcal{O}}\\left(":[130],"{{\\varepsilon":[131],"^{":[132],"-":[133],"2}}}":[134],"\\right)$.":[135],"Furthermore,":[136],"simulation":[137],"studies":[138],"large":[141],"KuaiRand":[142],"offline":[143],"dataset,":[144],"show":[145],"that":[146],"performance":[148],"significantly":[152],"outperforms":[153],"other":[154],"baseline":[155],"MORL":[156],"approaches.":[157]},"counts_by_year":[],"updated_date":"2026-01-14T00:46:21.520733","created_date":"2026-01-14T00:00:00"}
