{"id":"https://openalex.org/W4392931579","doi":"https://doi.org/10.1109/tai.2024.3375258","title":"Online Reinforcement Learning in Periodic MDP","display_name":"Online Reinforcement Learning in Periodic MDP","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392931579","doi":"https://doi.org/10.1109/tai.2024.3375258"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2024.3375258","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3375258","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013672718","display_name":"Ayush Aniket","orcid":null},"institutions":[{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Ayush Aniket","raw_affiliation_strings":["Department of Electrical Engineering, IIT Delhi, New Delhi, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, IIT Delhi, New Delhi, India","institution_ids":["https://openalex.org/I68891433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086971942","display_name":"Arpan Chattopadhyay","orcid":"https://orcid.org/0000-0002-2684-5912"},"institutions":[{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Arpan Chattopadhyay","raw_affiliation_strings":["Department of Electrical Engineering, IIT Delhi, New Delhi, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, IIT Delhi, New Delhi, India","institution_ids":["https://openalex.org/I68891433"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5013672718"],"corresponding_institution_ids":["https://openalex.org/I68891433"],"apc_list":null,"apc_paid":null,"fwci":2.0967,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.8629718,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"5","issue":"7","first_page":"3624","last_page":"3637"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.9021999835968018,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.9021999835968018,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.707420289516449},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6243897080421448},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5279889702796936},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2963166832923889},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2623094618320465},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.09193426370620728}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.707420289516449},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6243897080421448},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5279889702796936},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2963166832923889},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2623094618320465},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.09193426370620728}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2024.3375258","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3375258","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8526590997","display_name":null,"funder_award_id":"CRG/2022/003707","funder_id":"https://openalex.org/F4320334771","funder_display_name":"Science and Engineering Research Board"}],"funders":[{"id":"https://openalex.org/F4320334771","display_name":"Science and Engineering Research Board","ror":"https://ror.org/03ffdsr55"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W21934178","https://openalex.org/W1977655452","https://openalex.org/W1990667931","https://openalex.org/W2076330565","https://openalex.org/W2135829225","https://openalex.org/W2142971854","https://openalex.org/W2171092272","https://openalex.org/W2972710806","https://openalex.org/W2993208870","https://openalex.org/W3091875946","https://openalex.org/W3099984989","https://openalex.org/W3121214275","https://openalex.org/W4287633962","https://openalex.org/W4295177675","https://openalex.org/W4298023569","https://openalex.org/W6600849757","https://openalex.org/W6676833704","https://openalex.org/W6679661297","https://openalex.org/W6751346906","https://openalex.org/W6756411291","https://openalex.org/W6766799311","https://openalex.org/W6771332416","https://openalex.org/W6776719514","https://openalex.org/W6779771741","https://openalex.org/W6780394777","https://openalex.org/W6780779400","https://openalex.org/W6784134107","https://openalex.org/W6784344538","https://openalex.org/W6790330943","https://openalex.org/W6797339219"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291"],"abstract_inverted_index":{"We":[0,32,60],"study":[1],"learning":[2],"in":[3,119,146,149],"periodic":[4,52],"Markov":[5],"Decision":[6],"Process":[7],"(MDP),":[8],"a":[9,37,51,156],"special":[10],"type":[11],"of":[12,65,95,98,121,158,168],"non-stationary":[13],"MDP":[14,39],"where":[15],"both":[16,118],"the":[17,27,34,42,46,63,70,82,90,93,147,151,166,170],"state":[18,43],"transition":[19,96],"probabilities":[20],"and":[21,49,75,131,141],"reward":[22,29],"functions":[23],"vary":[24],"periodically,":[25],"under":[26],"average":[28],"maximization":[30],"setting.":[31],"formulate":[33],"problem":[35],"as":[36,76],"stationary":[38],"by":[40],"augmenting":[41],"space":[44],"with":[45,69,81,111],"period":[47,71,152],"index,":[48],"propose":[50,102,136],"upper":[53],"confidence":[54],"bound":[55],"reinforcement":[56],"learning-2":[57],"(PUCRL2)":[58],"algorithm.":[59],"show":[61],"that":[62],"regret":[64,122],"PUCRL2":[66],"varies":[67],"linearly":[68],"<italic":[72,85],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[73,78,86,125],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">N</i>":[74],"<inline-formula":[77,124],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[79,126],"notation=\"LaTeX\">$\\mathcal{O}(\\sqrt{TlogT})$</tex-math></inline-formula>":[80],"horizon":[83],"length":[84],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">T</i>":[87],".":[88],"Utilizing":[89],"information":[91],"about":[92],"sparsity":[94],"matrix":[97],"augmented":[99],"MDP,":[100],"we":[101,135],"another":[103],"algorithm":[104],"PUCRLB":[105],"(Periodic":[106],"Upper":[107],"Confidence":[108],"Reinforcement":[109],"Learning":[110],"Bernstein":[112],"bounds)":[113],"which":[114,150],"enhances":[115],"upon":[116],"PUCRL2,":[117],"terms":[120],"(":[123],"notation=\"LaTeX\">$O(\\sqrt{N})$</tex-math></inline-formula>":[127],"dependency":[128],"on":[129],"period)":[130],"empirical":[132],"performance.":[133],"Finally,":[134],"two":[137],"other":[138],"algorithms":[139],"U-PUCRL2":[140],"U-PUCRLB":[142],"for":[143],"extended":[144],"uncertainty":[145],"environment":[148],"is":[153],"unknown":[154],"but":[155],"set":[157],"candidate":[159],"periods":[160],"are":[161],"known.":[162],"Numerical":[163],"results":[164],"demonstrate":[165],"efficacy":[167],"all":[169],"algorithms.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
