{"id":"https://openalex.org/W4402864668","doi":"https://doi.org/10.1561/2400000038","title":"Constrained Reinforcement Learning with Average Reward Objective: Model-Based and Model-Free Algorithms","display_name":"Constrained Reinforcement Learning with Average Reward Objective: Model-Based and Model-Free Algorithms","publication_year":2024,"publication_date":"2024-08-21","ids":{"openalex":"https://openalex.org/W4402864668","doi":"https://doi.org/10.1561/2400000038"},"language":"en","primary_location":{"id":"doi:10.1561/2400000038","is_oa":false,"landing_page_url":"https://doi.org/10.1561/2400000038","pdf_url":null,"source":{"id":"https://openalex.org/S4210187074","display_name":"Foundations and Trends\u00ae in Optimization","issn_l":"2167-3888","issn":["2167-3888","2167-3918"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318575","host_organization_name":"Now Publishers","host_organization_lineage":["https://openalex.org/P4310318575"],"host_organization_lineage_names":["Now Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Foundations and Trends in Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064822688","display_name":"Vaneet Aggarwal","orcid":"https://orcid.org/0000-0001-9131-4723"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Vaneet Aggarwal","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007780293","display_name":"Washim Uddin Mondal","orcid":"https://orcid.org/0000-0002-2385-6034"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Washim Uddin Mondal","raw_affiliation_strings":["Indian Institute of Technology Kanpur"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Kanpur","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075904309","display_name":"Qinbo Bai","orcid":"https://orcid.org/0000-0003-2933-1180"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qinbo Bai","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5064822688"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25045095,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"6","issue":"4","first_page":"193","last_page":"298"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9419000148773193,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9419000148773193,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7777582406997681},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5296972393989563},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4702300727367401},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4200412333011627},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3809746205806732},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37203988432884216},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3573806881904602},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.26609551906585693},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24333202838897705},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.09440374374389648}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7777582406997681},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5296972393989563},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4702300727367401},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4200412333011627},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3809746205806732},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37203988432884216},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3573806881904602},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.26609551906585693},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24333202838897705},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.09440374374389648}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1561/2400000038","is_oa":false,"landing_page_url":"https://doi.org/10.1561/2400000038","pdf_url":null,"source":{"id":"https://openalex.org/S4210187074","display_name":"Foundations and Trends\u00ae in Optimization","issn_l":"2167-3888","issn":["2167-3888","2167-3918"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318575","host_organization_name":"Now Publishers","host_organization_lineage":["https://openalex.org/P4310318575"],"host_organization_lineage_names":["Now Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Foundations and Trends in Optimization","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.47999998927116394,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1971459440","https://openalex.org/W2149721706","https://openalex.org/W2160690679","https://openalex.org/W2334782222","https://openalex.org/W2766806202","https://openalex.org/W2795657119","https://openalex.org/W2968301466","https://openalex.org/W3102049471","https://openalex.org/W4206530644","https://openalex.org/W4283805055","https://openalex.org/W4290987548","https://openalex.org/W4319988692","https://openalex.org/W4382239124","https://openalex.org/W4393159969","https://openalex.org/W4396542711"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W2051487156","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"serves":[3],"as":[4,17,121],"a":[5,122,179],"versatile":[6],"framework":[7],"for":[8,66,124,136,164],"sequential":[9],"decision-making,":[10],"finding":[11],"applications":[12,35],"across":[13],"diverse":[14],"domains":[15],"such":[16],"robotics,":[18],"autonomous":[19],"driving,":[20],"recommendation":[21],"systems,":[22],"supply":[23],"chain":[24],"optimization,":[25],"biology,":[26],"mechanics,":[27],"and":[28,63,101,132,175],"finance.":[29],"The":[30,80,127],"primary":[31],"objective":[32],"of":[33,60,72,86,99,138,172,182],"these":[34],"is":[36,119],"to":[37,47,108,151,160,178],"maximize":[38],"the":[39,51,58,70,97,105,113,139,143,148,170],"average":[40,73],"reward.":[41],"Real-world":[42],"scenarios":[43],"often":[44],"necessitate":[45],"adherence":[46],"specific":[48],"constraints":[49],"during":[50],"learning":[52],"process.":[53],"This":[54],"monograph":[55,128,156],"focuses":[56],"on":[57],"exploration":[59],"various":[61],"model-based":[62,87],"model-free":[64,110],"approaches":[65],"Constrained":[67],"RL":[68],"within":[69],"context":[71],"reward":[74],"Markov":[75],"Decision":[76],"Pro":[77],"cesses":[78],"(MDPs).":[79],"investigation":[81],"commences":[82],"with":[83],"an":[84],"examination":[85],"strategies,":[88],"delving":[89],"into":[90],"two":[91],"foundational":[92],"methods":[93],"\u2013":[94],"optimism":[95],"in":[96],"face":[98],"uncertainty":[100],"posterior":[102],"sampling.":[103],"Subsequently,":[104],"discussion":[106,159],"transitions":[107],"parametrized":[109],"approaches,":[111],"where":[112],"primal":[114],"dual":[115],"policy":[116],"gradient-based":[117],"algorithm":[118],"explored":[120],"solution":[123],"constrained":[125],"MDPs.":[126],"provides":[129],"regret":[130],"guarantees":[131],"analyzes":[133],"constraint":[134],"violation":[135],"each":[137],"discussed":[140],"setups.":[141],"For":[142],"above":[144],"exploration,":[145],"we":[146],"assume":[147],"underlying":[149],"MDP":[150],"be":[152],"ergodic.":[153],"Further,":[154],"this":[155],"extends":[157],"its":[158,173],"encompass":[161],"results":[162],"tailored":[163],"weakly":[165],"communicating":[166],"MDPs,":[167],"thereby":[168],"broadening":[169],"scope":[171],"findings":[174],"their":[176],"relevance":[177],"wider":[180],"range":[181],"practical":[183],"scenarios.":[184]},"counts_by_year":[],"updated_date":"2025-12-30T23:08:21.542490","created_date":"2025-10-10T00:00:00"}
