{"id":"https://openalex.org/W2154190222","doi":"https://doi.org/10.1007/s10994-016-5569-5","title":"Variance-constrained actor-critic algorithms for discounted and average reward MDPs","display_name":"Variance-constrained actor-critic algorithms for discounted and average reward MDPs","publication_year":2016,"publication_date":"2016-08-05","ids":{"openalex":"https://openalex.org/W2154190222","doi":"https://doi.org/10.1007/s10994-016-5569-5","mag":"2154190222"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-016-5569-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-016-5569-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-016-5569-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-016-5569-5.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068379567","display_name":"L. A. Prashanth","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"L. A. Prashanth","raw_affiliation_strings":["Institute for Systems Research, University of Maryland, College Park, USA","Institute for Systems Research, University of Maryland,College Park,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Systems Research, University of Maryland, College Park, USA","institution_ids":["https://openalex.org/I66946132"]},{"raw_affiliation_string":"Institute for Systems Research, University of Maryland,College Park,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013843778","display_name":"Mohammad Ghavamzadeh","orcid":"https://orcid.org/0000-0003-0930-8688"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210138412","display_name":"Centre Inria de l'Universit\u00e9 de Lille","ror":"https://ror.org/04eej9726","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"]}],"countries":["FR","US"],"is_corresponding":false,"raw_author_name":"Mohammad Ghavamzadeh","raw_affiliation_strings":["Adobe Research, California, USA","INRIA, Lille, France","Adobe Research, California, USA and INRIA, Lille, France#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research, California, USA","institution_ids":["https://openalex.org/I1306409833"]},{"raw_affiliation_string":"INRIA, Lille, France","institution_ids":["https://openalex.org/I4210138412","https://openalex.org/I1326498283"]},{"raw_affiliation_string":"Adobe Research, California, USA and INRIA, Lille, France#TAB#","institution_ids":["https://openalex.org/I1306409833","https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":null,"fwci":1.7666,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.88699473,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"105","issue":"3","first_page":"367","last_page":"417"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.6925536394119263},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6265513896942139},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5937066078186035},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5848230719566345},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5395660996437073},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.504102349281311},{"id":"https://openalex.org/keywords/perturbation","display_name":"Perturbation (astronomy)","score":0.465399831533432},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.4646453559398651},{"id":"https://openalex.org/keywords/lagrange-multiplier","display_name":"Lagrange multiplier","score":0.42395636439323425},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.41928592324256897},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3573068678379059},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.273725688457489},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.22149834036827087},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.137827068567276},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12200424075126648},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.12043806910514832},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.1018858551979065},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.0993872880935669}],"concepts":[{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.6925536394119263},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6265513896942139},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5937066078186035},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5848230719566345},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5395660996437073},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.504102349281311},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.465399831533432},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.4646453559398651},{"id":"https://openalex.org/C73684929","wikidata":"https://www.wikidata.org/wiki/Q598870","display_name":"Lagrange multiplier","level":2,"score":0.42395636439323425},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.41928592324256897},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3573068678379059},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.273725688457489},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.22149834036827087},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.137827068567276},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12200424075126648},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.12043806910514832},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.1018858551979065},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0993872880935669},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10994-016-5569-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-016-5569-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-016-5569-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"mag:2154190222","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1403.6530","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null}],"best_oa_location":{"id":"doi:10.1007/s10994-016-5569-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-016-5569-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-016-5569-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7200000286102295}],"awards":[{"id":"https://openalex.org/G1523888516","display_name":null,"funder_award_id":"FA9550-","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G3279563081","display_name":null,"funder_award_id":"CNS-1446665","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G3532924018","display_name":null,"funder_award_id":"FA9550-15-10050","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3724117276","display_name":"A New Approach to Nonconvex Risk-Sensitive Stochastic Optimization","funder_award_id":"1362303","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3772436243","display_name":"CPS: Breakthrough: Compositional Modeling of Cyberphysical Systems","funder_award_id":"1446665","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4030459030","display_name":null,"funder_award_id":"CMMI-1362303","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5131194708","display_name":"New Approaches for Simulation-Based Optimal Decision Making","funder_award_id":"1434419","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5254519533","display_name":null,"funder_award_id":"FA9550-15-10050","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G5809100787","display_name":null,"funder_award_id":"FA9550","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G6833004606","display_name":null,"funder_award_id":"CMMI-1434419","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8694426034","display_name":null,"funder_award_id":"CMMI-1362303","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337391","display_name":"Division of Civil, Mechanical and Manufacturing Innovation","ror":"https://ror.org/028yd4c30"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2154190222.pdf","grobid_xml":"https://content.openalex.org/works/W2154190222.grobid-xml"},"referenced_works_count":83,"referenced_works":["https://openalex.org/W89883662","https://openalex.org/W594357522","https://openalex.org/W1518931405","https://openalex.org/W1526449679","https://openalex.org/W1569296262","https://openalex.org/W1576452626","https://openalex.org/W1585575029","https://openalex.org/W1678814353","https://openalex.org/W1876490351","https://openalex.org/W1897166328","https://openalex.org/W1914389580","https://openalex.org/W1965878388","https://openalex.org/W1985077192","https://openalex.org/W1985291828","https://openalex.org/W1990437501","https://openalex.org/W2001009060","https://openalex.org/W2007382693","https://openalex.org/W2009493601","https://openalex.org/W2012117977","https://openalex.org/W2013406658","https://openalex.org/W2038398071","https://openalex.org/W2057510529","https://openalex.org/W2059984770","https://openalex.org/W2064206680","https://openalex.org/W2065134213","https://openalex.org/W2066623346","https://openalex.org/W2070570138","https://openalex.org/W2071983464","https://openalex.org/W2073314543","https://openalex.org/W2086161653","https://openalex.org/W2088413745","https://openalex.org/W2094387729","https://openalex.org/W2098432798","https://openalex.org/W2100677568","https://openalex.org/W2107431923","https://openalex.org/W2114537044","https://openalex.org/W2115020145","https://openalex.org/W2119567691","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2124289529","https://openalex.org/W2125001944","https://openalex.org/W2125852847","https://openalex.org/W2128723253","https://openalex.org/W2129462326","https://openalex.org/W2132667699","https://openalex.org/W2136602922","https://openalex.org/W2139418546","https://openalex.org/W2139914196","https://openalex.org/W2141203641","https://openalex.org/W2144446635","https://openalex.org/W2150152677","https://openalex.org/W2152790647","https://openalex.org/W2155027007","https://openalex.org/W2156737235","https://openalex.org/W2161270100","https://openalex.org/W2162849300","https://openalex.org/W2165428239","https://openalex.org/W2165622730","https://openalex.org/W2235056388","https://openalex.org/W2313791856","https://openalex.org/W2334782222","https://openalex.org/W2493209382","https://openalex.org/W2565654137","https://openalex.org/W2798766386","https://openalex.org/W2952647718","https://openalex.org/W2962951833","https://openalex.org/W3103182070","https://openalex.org/W3173138228","https://openalex.org/W4230778209","https://openalex.org/W4241584625","https://openalex.org/W4243772471","https://openalex.org/W4247305511","https://openalex.org/W6603650463","https://openalex.org/W6634528131","https://openalex.org/W6674995601","https://openalex.org/W6677916085","https://openalex.org/W6683195989","https://openalex.org/W6750230808","https://openalex.org/W6780394890","https://openalex.org/W6818605078","https://openalex.org/W6987391319","https://openalex.org/W7058505135"],"related_works":["https://openalex.org/W2405788319","https://openalex.org/W2963856199","https://openalex.org/W2156737235","https://openalex.org/W2256989395","https://openalex.org/W3128954025","https://openalex.org/W3114629890","https://openalex.org/W2192829189","https://openalex.org/W2145868638","https://openalex.org/W3120541724","https://openalex.org/W2944187456","https://openalex.org/W1512407760","https://openalex.org/W3112348608","https://openalex.org/W3183820536","https://openalex.org/W2117964625","https://openalex.org/W3106238320","https://openalex.org/W2319020649","https://openalex.org/W2963057120","https://openalex.org/W2287042886","https://openalex.org/W2803402264","https://openalex.org/W2756036450"],"abstract_inverted_index":null,"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
