{"id":"https://openalex.org/W3086587008","doi":"https://doi.org/10.1109/cdc42340.2020.9303956","title":"Lower Bounds for Policy Iteration on Multi-action MDPs","display_name":"Lower Bounds for Policy Iteration on Multi-action MDPs","publication_year":2020,"publication_date":"2020-12-14","ids":{"openalex":"https://openalex.org/W3086587008","doi":"https://doi.org/10.1109/cdc42340.2020.9303956","mag":"3086587008"},"language":"en","primary_location":{"id":"doi:10.1109/cdc42340.2020.9303956","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc42340.2020.9303956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 59th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2009.07842","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103876791","display_name":"Kumar Ashutosh","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Kumar Ashutosh","raw_affiliation_strings":["Indian Institute of Technology Bombay, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040738554","display_name":"Sarthak Consul","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sarthak Consul","raw_affiliation_strings":["Indian Institute of Technology Bombay, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042238232","display_name":"Bhishma Dedhia","orcid":"https://orcid.org/0000-0001-8260-282X"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Bhishma Dedhia","raw_affiliation_strings":["Indian Institute of Technology Bombay, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048629408","display_name":"Parthasarathi Khirwadkar","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Parthasarathi Khirwadkar","raw_affiliation_strings":["Indian Institute of Technology Bombay, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055516208","display_name":"Sahil Shah","orcid":"https://orcid.org/0000-0001-5972-3199"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sahil Shah","raw_affiliation_strings":["Indian Institute of Technology Bombay, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038200034","display_name":"Shivaram Kalyanakrishnan","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shivaram Kalyanakrishnan","raw_affiliation_strings":["Indian Institute of Technology Bombay, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103876791"],"corresponding_institution_ids":["https://openalex.org/I162827531"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10793154,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1744","last_page":"1749"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.7900696992874146},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6781290173530579},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.5946836471557617},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5485712289810181},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5432776212692261},{"id":"https://openalex.org/keywords/omega","display_name":"Omega","score":0.5341087579727173},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5308046936988831},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.49764779210090637},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.43938931822776794},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.4275510013103485},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4219065010547638},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.34744393825531006},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3450203537940979},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.30222615599632263},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08677941560745239},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.06512337923049927}],"concepts":[{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.7900696992874146},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6781290173530579},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.5946836471557617},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5485712289810181},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5432776212692261},{"id":"https://openalex.org/C2779557605","wikidata":"https://www.wikidata.org/wiki/Q9890","display_name":"Omega","level":2,"score":0.5341087579727173},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5308046936988831},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.49764779210090637},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.43938931822776794},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4275510013103485},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4219065010547638},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.34744393825531006},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3450203537940979},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.30222615599632263},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08677941560745239},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.06512337923049927},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/cdc42340.2020.9303956","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc42340.2020.9303956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 59th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2009.07842","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.07842","pdf_url":"https://arxiv.org/pdf/2009.07842","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3086587008","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2009.07842.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2009.07842","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2009.07842","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2009.07842","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.07842","pdf_url":"https://arxiv.org/pdf/2009.07842","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7699999809265137}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334771","display_name":"Science and Engineering Research Board","ror":"https://ror.org/03ffdsr55"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3086587008.pdf","grobid_xml":"https://content.openalex.org/works/W3086587008.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1494380152","https://openalex.org/W1522772123","https://openalex.org/W1598325870","https://openalex.org/W1928484725","https://openalex.org/W2028145673","https://openalex.org/W2031259314","https://openalex.org/W2101915445","https://openalex.org/W2162425318","https://openalex.org/W2168024904","https://openalex.org/W2334782222","https://openalex.org/W2341171179","https://openalex.org/W2562620744","https://openalex.org/W2966177666","https://openalex.org/W6629437211","https://openalex.org/W6684620542","https://openalex.org/W6730563522","https://openalex.org/W6766450200"],"related_works":["https://openalex.org/W3092133906","https://openalex.org/W2805379554","https://openalex.org/W2950049300","https://openalex.org/W3168267598","https://openalex.org/W1969276875","https://openalex.org/W3176697953","https://openalex.org/W2256738662","https://openalex.org/W2951785836","https://openalex.org/W2949670066","https://openalex.org/W2140055897","https://openalex.org/W2177060007","https://openalex.org/W50486269","https://openalex.org/W2041927040","https://openalex.org/W3046724194","https://openalex.org/W2580903355","https://openalex.org/W2951753316","https://openalex.org/W1526138092","https://openalex.org/W2406356034","https://openalex.org/W1915973093","https://openalex.org/W2116708838"],"abstract_inverted_index":{"Policy":[0],"Iteration":[1],"(PI)":[2],"is":[3,26,49,67,142],"a":[4,71,80,144,168],"classical":[5],"family":[6],"of":[7,53,82,85,91,126,147,170,177],"algorithms":[8],"to":[9,27,34,39,77,153,163],"compute":[10],"an":[11,42,46],"optimal":[12,47],"policy":[13,32,38,48],"for":[14,61,135,172,182],"any":[15],"given":[16],"Markov":[17],"Decision":[18],"Problem":[19],"(MDP).":[20],"The":[21],"basic":[22],"idea":[23],"in":[24,94],"PI":[25,54,73,148],"begin":[28],"with":[29],"some":[30,173],"initial":[31],"and":[33,88,179],"repeatedly":[35],"update":[36],"the":[37,57,83,89,95,123],"one":[40],"from":[41,56],"improving":[43],"set,":[44],"until":[45],"reached.":[50],"Different":[51],"variants":[52,176],"result":[55,141],"(switching)":[58],"rule":[59],"used":[60],"improvement.":[62],"An":[63],"important":[64],"theoretical":[65],"question":[66],"how":[68],"many":[69],"iterations":[70,152],"specified":[72],"variant":[74,146],"will":[75],"take":[76,150],"terminate":[78],"as":[79],"function":[81],"number":[84,90],"states":[86],"$n$":[87],"actions":[92],"$k$":[93,171],"input":[96],"MDP.":[97],"While":[98],"there":[99,108],"has":[100],"been":[101],"considerable":[102],"progress":[103],"towards":[104],"upper-bounding":[105],"this":[106],"number,":[107],"are":[109],"fewer":[110],"results":[111],"on":[112,122,160],"lower":[113,118,133,165],"bounds.":[114],"In":[115],"particular,":[116],"existing":[117,158],"bounds":[119,134,166],"primarily":[120],"focus":[121],"special":[124],"case":[125],"$k":[127,136],"=":[128],"2$":[129],"actions.":[130],"We":[131,155],"devise":[132],"\\geq":[137],"3$.":[138],"Our":[139],"main":[140],"that":[143],"particular":[145],"can":[149],"$\u03a9(k^{n/2})$":[151],"terminate.":[154],"also":[156],"generalise":[157],"constructions":[159],"$2$-action":[161],"MDPs":[162],"scale":[164],"by":[167,180],"factor":[169],"common":[174],"deterministic":[175],"PI,":[178],"$\\log(k)$":[181],"corresponding":[183],"randomised":[184],"variants.":[185]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
