{"id":"https://openalex.org/W2996383434","doi":"https://doi.org/10.1109/tac.2021.3108121","title":"Online Reinforcement Learning of Optimal Threshold Policies for Markov Decision Processes","display_name":"Online Reinforcement Learning of Optimal Threshold Policies for Markov Decision Processes","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W2996383434","doi":"https://doi.org/10.1109/tac.2021.3108121","mag":"2996383434"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2021.3108121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2021.3108121","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1912.10325","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018946124","display_name":"Arghyadip Roy","orcid":"https://orcid.org/0000-0001-9955-9514"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arghyadip Roy","raw_affiliation_strings":["Coordinated Science Laboratory, University of Illinois at Urbana-Champaign, Urbana-Champaign, IL, USA","[Coordinated Science Laboratory, University of Illinois at Urbana-Champaign, Urbana, USA, United States of America, 61801 (e-mail: arghyadip89@gmail.com)]"],"raw_orcid":"https://orcid.org/0000-0001-9955-9514","affiliations":[{"raw_affiliation_string":"Coordinated Science Laboratory, University of Illinois at Urbana-Champaign, Urbana-Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"[Coordinated Science Laboratory, University of Illinois at Urbana-Champaign, Urbana, USA, United States of America, 61801 (e-mail: arghyadip89@gmail.com)]","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018541798","display_name":"Vivek S. Borkar","orcid":"https://orcid.org/0000-0003-0756-5402"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vivek Borkar","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India","[Department of Electrical Engineering, Indian Institute of Technology, Mumbai, Maharashtra, India, 400076 (e-mail: borkar.vs@gmail.com)]"],"raw_orcid":"https://orcid.org/0000-0003-0756-5402","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"[Department of Electrical Engineering, Indian Institute of Technology, Mumbai, Maharashtra, India, 400076 (e-mail: borkar.vs@gmail.com)]","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018587336","display_name":"Abhay Karandikar","orcid":"https://orcid.org/0000-0003-1608-3413"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]},{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Abhay Karandikar","raw_affiliation_strings":["Director, Indian Institute of Technology Kanpur (on leave from Department of Electrical Engineering, IIT Bombay, Mumbai 400076, India), Kanpur, India","[Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India, 400076 (e-mail: karandi@ee.iitb.ac.in)]"],"raw_orcid":"https://orcid.org/0000-0003-1608-3413","affiliations":[{"raw_affiliation_string":"Director, Indian Institute of Technology Kanpur (on leave from Department of Electrical Engineering, IIT Bombay, Mumbai 400076, India), Kanpur, India","institution_ids":["https://openalex.org/I94234084","https://openalex.org/I162827531"]},{"raw_affiliation_string":"[Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India, 400076 (e-mail: karandi@ee.iitb.ac.in)]","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034853149","display_name":"Prasanna Chaporkar","orcid":"https://orcid.org/0000-0001-5082-0179"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prasanna Chaporkar","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India","[Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India, 400076 (e-mail: chaporkar@ee.iitb.ac.in)]"],"raw_orcid":"https://orcid.org/0000-0001-5082-0179","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"[Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India, 400076 (e-mail: chaporkar@ee.iitb.ac.in)]","institution_ids":["https://openalex.org/I162827531"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018946124"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":0.6999,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7494494,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"67","issue":"7","first_page":"3722","last_page":"3729"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.9254450798034668},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8764433860778809},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.7133852243423462},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.707091748714447},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.6481888890266418},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6117784380912781},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6007907390594482},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5658718943595886},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.48795264959335327},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4462243914604187},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.422183096408844},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3255578279495239},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2811499834060669},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2371029257774353},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18754053115844727}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.9254450798034668},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8764433860778809},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.7133852243423462},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.707091748714447},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.6481888890266418},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6117784380912781},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6007907390594482},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5658718943595886},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.48795264959335327},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4462243914604187},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.422183096408844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3255578279495239},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2811499834060669},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2371029257774353},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18754053115844727},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tac.2021.3108121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2021.3108121","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1912.10325","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.10325","pdf_url":"https://arxiv.org/pdf/1912.10325","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2996383434","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1912.10325","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1912.10325","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1912.10325","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1912.10325","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.10325","pdf_url":"https://arxiv.org/pdf/1912.10325","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.46000000834465027,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321027","display_name":"Indo-French Centre for the Promotion of Advanced Research","ror":"https://ror.org/017aem598"},{"id":"https://openalex.org/F4320325255","display_name":"Ministry of Electronics and Information technology","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2996383434.pdf","grobid_xml":"https://content.openalex.org/works/W2996383434.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W594357522","https://openalex.org/W1500945877","https://openalex.org/W1601081659","https://openalex.org/W1980922866","https://openalex.org/W1981604825","https://openalex.org/W2021441076","https://openalex.org/W2068199252","https://openalex.org/W2070570138","https://openalex.org/W2071983464","https://openalex.org/W2078607712","https://openalex.org/W2082261506","https://openalex.org/W2082607057","https://openalex.org/W2098432798","https://openalex.org/W2098836038","https://openalex.org/W2102195169","https://openalex.org/W2110621360","https://openalex.org/W2119567691","https://openalex.org/W2120465407","https://openalex.org/W2121863487","https://openalex.org/W2124715093","https://openalex.org/W2138410336","https://openalex.org/W2138717292","https://openalex.org/W2153919611","https://openalex.org/W2154204727","https://openalex.org/W2155027007","https://openalex.org/W2167641136","https://openalex.org/W2334782222","https://openalex.org/W2341171179","https://openalex.org/W2791825310","https://openalex.org/W2883087722","https://openalex.org/W2884466981","https://openalex.org/W2902806543","https://openalex.org/W2908671941","https://openalex.org/W2964273152","https://openalex.org/W2981289360","https://openalex.org/W2986615937","https://openalex.org/W3006109470","https://openalex.org/W3008988392","https://openalex.org/W3033246677","https://openalex.org/W3198564127","https://openalex.org/W4213251304","https://openalex.org/W4214717370","https://openalex.org/W4243772471","https://openalex.org/W4300723704","https://openalex.org/W6683204974","https://openalex.org/W6752725515","https://openalex.org/W6769500229","https://openalex.org/W6779184107"],"related_works":["https://openalex.org/W2953183007","https://openalex.org/W3198564127","https://openalex.org/W2150339816","https://openalex.org/W2512014291","https://openalex.org/W2154623234","https://openalex.org/W2759520088","https://openalex.org/W2160067530","https://openalex.org/W3197469660","https://openalex.org/W1999784270","https://openalex.org/W2970870329","https://openalex.org/W307767029","https://openalex.org/W1851714595","https://openalex.org/W2276878381","https://openalex.org/W189510620","https://openalex.org/W3046384803","https://openalex.org/W1990538571","https://openalex.org/W2964340170","https://openalex.org/W3159988179","https://openalex.org/W59916768","https://openalex.org/W3198851288"],"abstract_inverted_index":{"To":[0],"overcome":[1],"the":[2,37,41,52,57,64,68,72,77,80,83,102],"<italic":[3],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[4],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">curses":[5],"of":[6,10,40,56,67],"dimensionality":[7],"and":[8,91],"modeling</i>":[9],"dynamic":[11],"programming":[12],"methods":[13,23],"to":[14,29,50,71,76],"solve":[15],"Markov":[16],"decision":[17],"process":[18],"problems,":[19],"reinforcement":[20],"learning":[21,48],"(RL)":[22],"are":[24],"adopted":[25],"in":[26,79,89],"practice.":[27],"Contrary":[28],"traditional":[30],"RL":[31,96,109],"algorithms,":[32],"which":[33],"do":[34],"not":[35],"consider":[36],"structural":[38],"properties":[39],"optimal":[42,58,73],"policy,":[43,59],"we":[44],"propose":[45],"a":[46],"structure-aware":[47],"algorithm":[49,70,85,104],"exploit":[51],"ordered":[53],"multithreshold":[54],"structure":[55],"if":[60],"any.":[61],"We":[62],"prove":[63],"asymptotic":[65],"convergence":[66],"proposed":[69,84,103],"policy.":[74],"Due":[75],"reduction":[78],"policy":[81],"space,":[82],"provides":[86],"remarkable":[87],"improvements":[88],"storage":[90],"computational":[92],"complexities":[93],"over":[94],"classical":[95],"algorithms.":[97,110],"Simulation":[98],"results":[99],"establish":[100],"that":[101],"converges":[105],"faster":[106],"than":[107],"other":[108]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
