{"id":"https://openalex.org/W3172265509","doi":"https://doi.org/10.1007/978-3-030-81688-9_30","title":"Model-Free Reinforcement Learning for Branching Markov Decision Processes","display_name":"Model-Free Reinforcement Learning for Branching Markov Decision Processes","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3172265509","doi":"https://doi.org/10.1007/978-3-030-81688-9_30","mag":"3172265509"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-030-81688-9_30","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-030-81688-9_30","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-030-81688-9_30.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/978-3-030-81688-9_30.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030417706","display_name":"Ernst Moritz Hahn","orcid":"https://orcid.org/0000-0002-9348-7684"},"institutions":[{"id":"https://openalex.org/I94624287","display_name":"University of Twente","ror":"https://ror.org/006hf6230","country_code":"NL","type":"education","lineage":["https://openalex.org/I94624287"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Ernst Moritz Hahn","raw_affiliation_strings":["University of Twente, Enschede, The Netherlands","University of Twente  Enschede The Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Twente, Enschede, The Netherlands","institution_ids":["https://openalex.org/I94624287"]},{"raw_affiliation_string":"University of Twente  Enschede The Netherlands","institution_ids":["https://openalex.org/I94624287"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084221298","display_name":"Mateo Perez","orcid":"https://orcid.org/0000-0003-4220-3212"},"institutions":[{"id":"https://openalex.org/I188538660","display_name":"University of Colorado Boulder","ror":"https://ror.org/02ttsq026","country_code":"US","type":"education","lineage":["https://openalex.org/I188538660"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mateo Perez","raw_affiliation_strings":["University of Colorado Boulder, Boulder, USA","University of Colorado \u2014 Boulder, Boulder, USA"],"affiliations":[{"raw_affiliation_string":"University of Colorado Boulder, Boulder, USA","institution_ids":["https://openalex.org/I188538660"]},{"raw_affiliation_string":"University of Colorado \u2014 Boulder, Boulder, USA","institution_ids":["https://openalex.org/I188538660"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041836791","display_name":"Sven Schewe","orcid":"https://orcid.org/0000-0002-9093-9518"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sven Schewe","raw_affiliation_strings":["University of Liverpool, Liverpool, UK","university of liverpool"],"affiliations":[{"raw_affiliation_string":"University of Liverpool, Liverpool, UK","institution_ids":["https://openalex.org/I146655781"]},{"raw_affiliation_string":"university of liverpool","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077549627","display_name":"Fabio Somenzi","orcid":"https://orcid.org/0000-0002-2085-2003"},"institutions":[{"id":"https://openalex.org/I188538660","display_name":"University of Colorado Boulder","ror":"https://ror.org/02ttsq026","country_code":"US","type":"education","lineage":["https://openalex.org/I188538660"]},{"id":"https://openalex.org/I2802236040","display_name":"University of Colorado System","ror":"https://ror.org/00jc20583","country_code":"US","type":"education","lineage":["https://openalex.org/I2802236040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fabio Somenzi","raw_affiliation_strings":["University of Colorado Boulder, Boulder, USA","University of Colorado Boulder"],"affiliations":[{"raw_affiliation_string":"University of Colorado Boulder, Boulder, USA","institution_ids":["https://openalex.org/I188538660"]},{"raw_affiliation_string":"University of Colorado Boulder","institution_ids":["https://openalex.org/I2802236040","https://openalex.org/I188538660"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020302140","display_name":"Ashutosh Trivedi","orcid":"https://orcid.org/0000-0001-9346-0126"},"institutions":[{"id":"https://openalex.org/I188538660","display_name":"University of Colorado Boulder","ror":"https://ror.org/02ttsq026","country_code":"US","type":"education","lineage":["https://openalex.org/I188538660"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashutosh Trivedi","raw_affiliation_strings":["University of Colorado Boulder, Boulder, USA","University of Colorado \u2014 Boulder, Boulder, USA"],"affiliations":[{"raw_affiliation_string":"University of Colorado Boulder, Boulder, USA","institution_ids":["https://openalex.org/I188538660"]},{"raw_affiliation_string":"University of Colorado \u2014 Boulder, Boulder, USA","institution_ids":["https://openalex.org/I188538660"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001234060","display_name":"Dominik Wojtczak","orcid":"https://orcid.org/0000-0001-5560-0546"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dominik Wojtczak","raw_affiliation_strings":["University of Liverpool, Liverpool, UK","university of liverpool"],"affiliations":[{"raw_affiliation_string":"University of Liverpool, Liverpool, UK","institution_ids":["https://openalex.org/I146655781"]},{"raw_affiliation_string":"university of liverpool","institution_ids":["https://openalex.org/I146655781"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5030417706"],"corresponding_institution_ids":["https://openalex.org/I94624287"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":{"value":5000,"currency":"EUR","value_usd":5392},"fwci":0.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.1057791,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"651","last_page":"673"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10621","display_name":"Gene Regulatory Network Analysis","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.852148175239563},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8096174001693726},{"id":"https://openalex.org/keywords/stochastic-game","display_name":"Stochastic game","score":0.7472468614578247},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.7238109111785889},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6980060935020447},{"id":"https://openalex.org/keywords/branching","display_name":"Branching (polymer chemistry)","score":0.62422776222229},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5452864170074463},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.49636727571487427},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.4520435929298401},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4167943298816681},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4133645296096802},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3871883451938629},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3547047972679138},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3053782284259796},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21475735306739807},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.10433617234230042},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08727362751960754},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07009533047676086}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.852148175239563},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8096174001693726},{"id":"https://openalex.org/C22171661","wikidata":"https://www.wikidata.org/wiki/Q1074380","display_name":"Stochastic game","level":2,"score":0.7472468614578247},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.7238109111785889},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6980060935020447},{"id":"https://openalex.org/C206175624","wikidata":"https://www.wikidata.org/wiki/Q595731","display_name":"Branching (polymer chemistry)","level":2,"score":0.62422776222229},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5452864170074463},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.49636727571487427},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.4520435929298401},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4167943298816681},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4133645296096802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3871883451938629},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3547047972679138},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3053782284259796},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21475735306739807},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.10433617234230042},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08727362751960754},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07009533047676086},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/978-3-030-81688-9_30","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-030-81688-9_30","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-030-81688-9_30.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:ris.utwente.nl:openaire_cris_publications/4352f483-2620-42d0-b04c-890952ca55fd","is_oa":true,"landing_page_url":"https://research.utwente.nl/en/publications/4352f483-2620-42d0-b04c-890952ca55fd","pdf_url":null,"source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Hahn, E M, Perez, M, Schewe, S, Somenzi, F, Trivedi, A & Wojtczak, D 2021, Model-Free Reinforcement Learning for Branching Markov Decision Processes. in A Silva & K R M Leino (eds), Computer Aided Verification - 33rd International Conference, CAV 2021, Virtual Event, July 20-23, 2021, Proceedings, Part II. Lecture Notes in Computer Science, vol. 12760, Springer, pp. 651-673, 33rd International Conference on Computer Aided Verification, CAV 2021, Virtual Event, 20/07/21. https://doi.org/10.1007/978-3-030-81688-9_30","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2106.06777","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.06777","pdf_url":"https://arxiv.org/pdf/2106.06777","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3172265509","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2106.06777.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.06777","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.06777","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/978-3-030-81688-9_30","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-030-81688-9_30","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-030-81688-9_30.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.800000011920929,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1934935867","display_name":null,"funder_award_id":"Engineering and Physical Sciences R","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3795356847","display_name":"Synthesising Game Solving Techniques","funder_award_id":"101032464","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4968910455","display_name":"Solving Parity Games in Theory and Practice","funder_award_id":"EP/P020909/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5036817778","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innov","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5399556803","display_name":null,"funder_award_id":"This project has received funding from the Europea","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6332253830","display_name":"SHF: Small: Omega-Regular Objectives for Model-Free Reinforcement Learning","funder_award_id":"2009022","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7095971100","display_name":"NER:  Novel Nanocomposites Using Controlled Dynamics Of Interfacial Processes","funder_award_id":"0103246","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7815803692","display_name":null,"funder_award_id":"EP/P020909/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8452545418","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8491077145","display_name":"Integrating Safety and Cybersecurity through Stochastic Model Checking","funder_award_id":"864075","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8633428685","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innovat","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8719353587","display_name":null,"funder_award_id":"EP/P0","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3172265509.pdf","grobid_xml":"https://content.openalex.org/works/W3172265509.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W158955491","https://openalex.org/W567049642","https://openalex.org/W832026039","https://openalex.org/W1038051412","https://openalex.org/W1523572153","https://openalex.org/W1862398452","https://openalex.org/W2005059149","https://openalex.org/W2015773902","https://openalex.org/W2028099358","https://openalex.org/W2055776578","https://openalex.org/W2066594542","https://openalex.org/W2098435584","https://openalex.org/W2118686230","https://openalex.org/W2119567691","https://openalex.org/W2143984458","https://openalex.org/W2153399290","https://openalex.org/W2159539059","https://openalex.org/W2165773105","https://openalex.org/W2220995138","https://openalex.org/W2329517595","https://openalex.org/W2334782222","https://openalex.org/W2490015446","https://openalex.org/W2568511220","https://openalex.org/W2790209383","https://openalex.org/W2897055411","https://openalex.org/W2907632475","https://openalex.org/W2993580538","https://openalex.org/W4235035462","https://openalex.org/W4297962570"],"related_works":["https://openalex.org/W3184305164","https://openalex.org/W2236968622","https://openalex.org/W2144794447","https://openalex.org/W50090763","https://openalex.org/W2983179522","https://openalex.org/W2149126181","https://openalex.org/W47969833","https://openalex.org/W2970309030","https://openalex.org/W2142410606","https://openalex.org/W3204750342","https://openalex.org/W1552684655","https://openalex.org/W2164397622","https://openalex.org/W2104130615","https://openalex.org/W1548889916","https://openalex.org/W2401977081","https://openalex.org/W1433129784","https://openalex.org/W1968653071","https://openalex.org/W2123157758","https://openalex.org/W3206858608","https://openalex.org/W2945129946"],"abstract_inverted_index":{"Abstract":[0],"We":[1,89,108],"study":[2,82],"reinforcement":[3,92],"learning":[4,93],"for":[5],"the":[6,51,58,62,83,87,106,116,119],"optimal":[7,98],"control":[8,99],"of":[9,18,26,33,35,53,57,76,86,101,111,118],"Branching":[10,20],"Markov":[11,21],"Decision":[12],"Processes":[13],"(BMDPs),":[14],"a":[15,27,31,44,54,74],"natural":[16],"extension":[17],"(multitype)":[19],"Chains":[22],"(BMCs).":[23],"The":[24],"state":[25],"(discrete-time)":[28],"BMCs":[29],"is":[30],"collection":[32],"entities":[34],"various":[36],"types":[37],"that,":[38],"while":[39],"spawning":[40],"other":[41],"entities,":[42],"generate":[43],"payoff.":[45],"In":[46],"comparison":[47],"with":[48],"BMCs,":[49],"where":[50],"evolution":[52],"each":[55],"entity":[56],"same":[59,63],"type":[60],"follows":[61],"probabilistic":[64],"pattern,":[65],"BMDPs":[66],"allow":[67],"an":[68,97,102,112],"external":[69],"controller":[70],"to":[71,81,95],"pick":[72],"from":[73],"range":[75],"options.":[77],"This":[78],"permits":[79],"us":[80],"best/worst":[84],"behaviour":[85],"system.":[88],"generalise":[90],"model-free":[91],"techniques":[94],"compute":[96],"strategy":[100],"unknown":[103],"BMDP":[104],"in":[105],"limit.":[107],"present":[109],"results":[110],"implementation":[113],"that":[114],"demonstrate":[115],"practicality":[117],"approach.":[120]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
