{"id":"https://openalex.org/W2525573332","doi":"https://doi.org/10.1109/tac.2015.2497904","title":"Finding Optimal Observation-Based Policies for Constrained POMDPs Under the Expected Average Reward Criterion","display_name":"Finding Optimal Observation-Based Policies for Constrained POMDPs Under the Expected Average Reward Criterion","publication_year":2015,"publication_date":"2015-11-04","ids":{"openalex":"https://openalex.org/W2525573332","doi":"https://doi.org/10.1109/tac.2015.2497904","mag":"2525573332"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2015.2497904","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2015.2497904","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088416755","display_name":"Xiaofeng Jiang","orcid":"https://orcid.org/0000-0001-7595-2397"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaofeng Jiang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069802328","display_name":"Hongsheng Xi","orcid":"https://orcid.org/0000-0002-5747-9732"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongsheng Xi","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100382658","display_name":"Xiaodong Wang","orcid":"https://orcid.org/0000-0002-2945-9240"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaodong Wang","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046052004","display_name":"Falin Liu","orcid":"https://orcid.org/0000-0001-6815-7133"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Falin Liu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5088416755"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":1.0098,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.81364948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"61","issue":"10","first_page":"3070","last_page":"3075"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7120274901390076},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6777392029762268},{"id":"https://openalex.org/keywords/ergodic-theory","display_name":"Ergodic theory","score":0.5658432841300964},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.542761504650116},{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.5245733261108398},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5050213932991028},{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.4591917097568512},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.45472609996795654},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.44239935278892517},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.44059431552886963},{"id":"https://openalex.org/keywords/basis","display_name":"Basis (linear algebra)","score":0.4402036964893341},{"id":"https://openalex.org/keywords/perfect-information","display_name":"Perfect information","score":0.41629183292388916},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4102785587310791},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2293291687965393},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.09202691912651062},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.08630797266960144}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7120274901390076},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6777392029762268},{"id":"https://openalex.org/C122044880","wikidata":"https://www.wikidata.org/wiki/Q5498822","display_name":"Ergodic theory","level":2,"score":0.5658432841300964},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.542761504650116},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.5245733261108398},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5050213932991028},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.4591917097568512},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.45472609996795654},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.44239935278892517},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.44059431552886963},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.4402036964893341},{"id":"https://openalex.org/C123676819","wikidata":"https://www.wikidata.org/wiki/Q1074338","display_name":"Perfect information","level":2,"score":0.41629183292388916},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4102785587310791},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2293291687965393},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.09202691912651062},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.08630797266960144},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2015.2497904","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2015.2497904","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.800000011920929}],"awards":[{"id":"https://openalex.org/G5156508193","display_name":null,"funder_award_id":"61503358","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6392292501","display_name":null,"funder_award_id":"61233003","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8611069936","display_name":null,"funder_award_id":"2014M561839","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W434069379","https://openalex.org/W1006554781","https://openalex.org/W1503109067","https://openalex.org/W1555477527","https://openalex.org/W1994616650","https://openalex.org/W2034725503","https://openalex.org/W2055418958","https://openalex.org/W2096594845","https://openalex.org/W2099430963","https://openalex.org/W2102162493","https://openalex.org/W2110688863","https://openalex.org/W2119852681","https://openalex.org/W2120465407","https://openalex.org/W2156676156","https://openalex.org/W2171033527","https://openalex.org/W2484957131","https://openalex.org/W2611866857","https://openalex.org/W3146520007","https://openalex.org/W6629917913","https://openalex.org/W6633295396","https://openalex.org/W6678181239"],"related_works":["https://openalex.org/W2096013579","https://openalex.org/W1760611253","https://openalex.org/W52153049","https://openalex.org/W1589140671","https://openalex.org/W2951545791","https://openalex.org/W1515117609","https://openalex.org/W2294884454","https://openalex.org/W4323315247","https://openalex.org/W3169161914","https://openalex.org/W2360321812"],"abstract_inverted_index":{"In":[0],"this":[1],"technical":[2],"note,":[3],"constrained":[4],"partially":[5],"observable":[6],"Markov":[7,73],"decision":[8],"processes":[9],"with":[10,75,89],"discrete":[11],"state":[12,78],"and":[13,65],"action":[14],"spaces":[15],"under":[16],"the":[17,31,44,49,70,76,87,99,102],"average":[18],"reward":[19],"criterion":[20],"are":[21],"studied":[22],"from":[23],"a":[24,38,52],"sensitivity":[25],"point":[26],"of":[27,33,51,101],"view.":[28],"By":[29],"analyzing":[30],"derivatives":[32],"performance":[34,81],"criteria,":[35],"we":[36],"develop":[37],"simulation-based":[39],"optimization":[40],"algorithm":[41,57],"to":[42,69,84,86,97],"find":[43],"optimal":[45],"observation-based":[46],"policy":[47],"on":[48],"basis":[50],"single":[53],"sample":[54],"path.":[55],"This":[56],"does":[58],"not":[59],"need":[60],"any":[61],"overly":[62],"strict":[63],"assumption":[64],"can":[66],"be":[67],"applied":[68],"general":[71],"ergodic":[72],"systems":[74],"imperfect":[77],"information.":[79],"The":[80],"is":[82,95],"proved":[83],"converge":[85],"optimum":[88],"probability":[90],"1.":[91],"One":[92],"numerical":[93],"example":[94],"provided":[96],"illustrate":[98],"applicability":[100],"algorithm.":[103]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
