{"id":"https://openalex.org/W4413267723","doi":"https://doi.org/10.1109/tac.2025.3593328","title":"An Actor\u2013Critic Algorithm With Function Approximation for Risk Sensitive Cost Markov Decision Processes","display_name":"An Actor\u2013Critic Algorithm With Function Approximation for Risk Sensitive Cost Markov Decision Processes","publication_year":2025,"publication_date":"2025-07-28","ids":{"openalex":"https://openalex.org/W4413267723","doi":"https://doi.org/10.1109/tac.2025.3593328"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2025.3593328","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2025.3593328","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011056251","display_name":"Soumyajit Guin","orcid":"https://orcid.org/0000-0003-2327-3549"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Soumyajit Guin","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, India","Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0003-2327-3549","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018541798","display_name":"Vivek S. Borkar","orcid":"https://orcid.org/0000-0003-0756-5402"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vivek S. Borkar","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India","Department of Electrical Engineering, Indian Institute of Technology, Bombay, Mumbai, India"],"raw_orcid":"https://orcid.org/0000-0003-0756-5402","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology, Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038163398","display_name":"Shalabh Bhatnagar","orcid":"https://orcid.org/0000-0001-7644-3914"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shalabh Bhatnagar","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, India","Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0001-7644-3914","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5011056251"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":1.8773,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87641084,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"71","issue":"1","first_page":"474","last_page":"481"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11810","display_name":"Complex Systems and Decision Making","score":0.5062999725341797,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11810","display_name":"Complex Systems and Decision Making","score":0.5062999725341797,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6905633211135864},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.550002932548523},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5035180449485779},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.5019733905792236},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49214285612106323},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4885600507259369},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4547310769557953},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.434237539768219},{"id":"https://openalex.org/keywords/decision-theory","display_name":"Decision theory","score":0.4129425585269928},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27963292598724365},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.24654895067214966},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09005424380302429}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6905633211135864},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.550002932548523},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5035180449485779},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.5019733905792236},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49214285612106323},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4885600507259369},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4547310769557953},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.434237539768219},{"id":"https://openalex.org/C28901747","wikidata":"https://www.wikidata.org/wiki/Q177571","display_name":"Decision theory","level":2,"score":0.4129425585269928},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27963292598724365},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.24654895067214966},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09005424380302429},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2025.3593328","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2025.3593328","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5799999833106995,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G5673237317","display_name":null,"funder_award_id":"DFTM/02/3125/M/04/AIR-0","funder_id":"https://openalex.org/F4320321024","funder_display_name":"Defence Research and Development Organisation"}],"funders":[{"id":"https://openalex.org/F4320321024","display_name":"Defence Research and Development Organisation","ror":"https://ror.org/05k37v296"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1646707810","https://openalex.org/W1990437501","https://openalex.org/W2009303086","https://openalex.org/W2013406658","https://openalex.org/W2019291268","https://openalex.org/W2070443156","https://openalex.org/W2071983464","https://openalex.org/W2075268401","https://openalex.org/W2080631849","https://openalex.org/W2086304253","https://openalex.org/W2094387729","https://openalex.org/W2132775962","https://openalex.org/W2139418546","https://openalex.org/W2139914196","https://openalex.org/W2154204727","https://openalex.org/W2158732749","https://openalex.org/W2235056388","https://openalex.org/W2481117679","https://openalex.org/W2493209382","https://openalex.org/W3003521509","https://openalex.org/W4392646050","https://openalex.org/W4413267723"],"related_works":["https://openalex.org/W2379651310","https://openalex.org/W2113019827","https://openalex.org/W1541249122","https://openalex.org/W2084326697","https://openalex.org/W2027903142","https://openalex.org/W2354322608","https://openalex.org/W2804608325","https://openalex.org/W2077211377","https://openalex.org/W2413828414","https://openalex.org/W2186675474"],"abstract_inverted_index":{"In":[0],"this":[1,24,66],"paper,":[2],"we":[3],"consider":[4],"the":[5,36,45,49,53,77,84,96],"risk-sensitive":[6,37],"cost":[7,28,38],"criterion":[8,39],"with":[9,62],"exponentiated":[10],"costs":[11],"for":[12],"Markov":[13],"decision":[14],"processes":[15],"and":[16,68],"develop":[17,58],"a":[18],"model-free":[19],"policy":[20],"gradient":[21],"algorithm":[22,61,90],"in":[23,65,86,95],"setting.":[25],"Unlike":[26],"additive":[27],"criteria":[29],"such":[30],"as":[31],"average":[32],"or":[33],"discounted":[34],"cost,":[35],"is":[40],"less":[41],"studied":[42],"due":[43],"to":[44],"complexity":[46],"resulting":[47,54],"from":[48],"multiplicative":[50],"structure":[51],"of":[52,79,88],"Bellman":[55],"equation.":[56],"We":[57,74],"an":[59],"actor-critic":[60],"function":[63],"approximation":[64],"setting":[67],"provide":[69],"its":[70],"asymptotic":[71],"convergence":[72],"analysis.":[73],"also":[75],"show":[76],"results":[78],"numerical":[80],"experiments":[81],"that":[82],"demonstrate":[83],"superiority":[85],"performance":[87],"our":[89],"over":[91],"other":[92],"recent":[93],"algorithms":[94],"literature.":[97]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-31T23:11:33.660297","created_date":"2025-10-10T00:00:00"}
