{"id":"https://openalex.org/W2120678009","doi":"https://doi.org/10.1007/s10994-013-5368-1","title":"Minimax PAC bounds on the sample complexity of reinforcement learning with a generative model","display_name":"Minimax PAC bounds on the sample complexity of reinforcement learning with a generative model","publication_year":2013,"publication_date":"2013-05-13","ids":{"openalex":"https://openalex.org/W2120678009","doi":"https://doi.org/10.1007/s10994-013-5368-1","mag":"2120678009"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-013-5368-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-013-5368-1","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs10994-013-5368-1.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://link.springer.com/content/pdf/10.1007%2Fs10994-013-5368-1.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043355670","display_name":"Mohammad Gheshlaghi Azar","orcid":null},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Mohammad Gheshlaghi Azar","raw_affiliation_strings":["Department of Biophysics, Radboud University Nijmegen, 6525, EZ Nijmegen, The Netherlands","Department of Biophysics, Radboud University Nijmegen, EZ Nijmegen, The Netherlands 6525 and School of Computer Science, Carnegie Mellon University, Pittsburgh, USA 15213#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biophysics, Radboud University Nijmegen, 6525, EZ Nijmegen, The Netherlands","institution_ids":["https://openalex.org/I145872427"]},{"raw_affiliation_string":"Department of Biophysics, Radboud University Nijmegen, EZ Nijmegen, The Netherlands 6525 and School of Computer Science, Carnegie Mellon University, Pittsburgh, USA 15213#TAB#","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006533777","display_name":"R\u00e9mi Munos","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210138412","display_name":"Centre Inria de l'Universit\u00e9 de Lille","ror":"https://ror.org/04eej9726","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"R\u00e9mi Munos","raw_affiliation_strings":["INRIA Lille, SequeL Project, 40 avenue Halley, 59650, Villeneuve d\u2019Ascq, France","INRIA Lille, SequeL Project, Villeneuve d'Ascq, France 59650#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INRIA Lille, SequeL Project, 40 avenue Halley, 59650, Villeneuve d\u2019Ascq, France","institution_ids":["https://openalex.org/I4210138412"]},{"raw_affiliation_string":"INRIA Lille, SequeL Project, Villeneuve d'Ascq, France 59650#TAB#","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050819882","display_name":"Hilbert J. Kappen","orcid":"https://orcid.org/0000-0002-5728-3676"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Hilbert J. Kappen","raw_affiliation_strings":["Department of Biophysics, Radboud University Nijmegen, 6525, EZ Nijmegen, The Netherlands","Department of Biophysics, Radboud University Nijmegen, EZ Nijmegen, The Netherlands 6525#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biophysics, Radboud University Nijmegen, 6525, EZ Nijmegen, The Netherlands","institution_ids":["https://openalex.org/I145872427"]},{"raw_affiliation_string":"Department of Biophysics, Radboud University Nijmegen, EZ Nijmegen, The Netherlands 6525#TAB#","institution_ids":["https://openalex.org/I145872427"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":null,"fwci":0.9883,"has_fulltext":true,"cited_by_count":159,"citation_normalized_percentile":{"value":0.83354039,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"91","issue":"3","first_page":"325","last_page":"349"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9629999995231628,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8065630197525024},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.7753376364707947},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.7670086622238159},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7478051781654358},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.731660783290863},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.6909962296485901},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.6049531102180481},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5070836544036865},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.49857044219970703},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4718846082687378},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4515319764614105},{"id":"https://openalex.org/keywords/probably-approximately-correct-learning","display_name":"Probably approximately correct learning","score":0.42815378308296204},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41892209649086},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4058069586753845},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.40136560797691345},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.3992643654346466},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24881145358085632},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.24526220560073853},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.234796404838562},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.17294010519981384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14501720666885376},{"id":"https://openalex.org/keywords/computational-learning-theory","display_name":"Computational learning theory","score":0.09626242518424988},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.06853187084197998}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8065630197525024},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.7753376364707947},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.7670086622238159},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7478051781654358},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.731660783290863},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6909962296485901},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.6049531102180481},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5070836544036865},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.49857044219970703},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4718846082687378},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4515319764614105},{"id":"https://openalex.org/C176248197","wikidata":"https://www.wikidata.org/wiki/Q458526","display_name":"Probably approximately correct learning","level":4,"score":0.42815378308296204},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41892209649086},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4058069586753845},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.40136560797691345},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3992643654346466},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24881145358085632},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.24526220560073853},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.234796404838562},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.17294010519981384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14501720666885376},{"id":"https://openalex.org/C50292564","wikidata":"https://www.wikidata.org/wiki/Q2462783","display_name":"Computational learning theory","level":3,"score":0.09626242518424988},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.06853187084197998},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1007/s10994-013-5368-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-013-5368-1","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs10994-013-5368-1.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-00831875v1","is_oa":true,"landing_page_url":"https://hal.science/hal-00831875","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning, 2013, 91 (3), pp.325-349. &#x27E8;10.1007/s10994-013-5368-1&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:lilloa.univ-lille.fr:20.500.12210/26534","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.12210/26534","pdf_url":null,"source":{"id":"https://openalex.org/S4306402203","display_name":"LillOA (Universit\u00e9 de Lille (University Of Lille))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210123514","host_organization_name":"Centre d'Etudes en Civilisations, Langues et Litt\u00e9ratures Etrang\u00e8res","host_organization_lineage":["https://openalex.org/I4210123514"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:ru:oai:repository.ubn.ru.nl:2066/111191","is_oa":true,"landing_page_url":"http://hdl.handle.net/2066/111191","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning, 91, 325 - 349","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1007/s10994-013-5368-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-013-5368-1","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs10994-013-5368-1.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.75}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2120678009.pdf","grobid_xml":"https://content.openalex.org/works/W2120678009.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W21934178","https://openalex.org/W107583932","https://openalex.org/W137538757","https://openalex.org/W178169250","https://openalex.org/W1515851193","https://openalex.org/W1526654727","https://openalex.org/W1570963478","https://openalex.org/W1576452626","https://openalex.org/W1662803991","https://openalex.org/W1850488217","https://openalex.org/W1867103660","https://openalex.org/W1953057174","https://openalex.org/W2013614847","https://openalex.org/W2039439610","https://openalex.org/W2057399436","https://openalex.org/W2073384958","https://openalex.org/W2098432798","https://openalex.org/W2101786389","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2122701159","https://openalex.org/W2123447947","https://openalex.org/W2132876566","https://openalex.org/W2147967768","https://openalex.org/W2149166950","https://openalex.org/W2168723691","https://openalex.org/W2313791856","https://openalex.org/W2334782222","https://openalex.org/W2492794003","https://openalex.org/W2529217187","https://openalex.org/W2530230564","https://openalex.org/W2559997609","https://openalex.org/W2604272474","https://openalex.org/W4214717370","https://openalex.org/W4243421009","https://openalex.org/W4299401133","https://openalex.org/W4307347247","https://openalex.org/W6604362843","https://openalex.org/W6634528131","https://openalex.org/W6677916085"],"related_works":["https://openalex.org/W1885033443","https://openalex.org/W2990709181","https://openalex.org/W2043360510","https://openalex.org/W2964179321","https://openalex.org/W2999552052","https://openalex.org/W572997848","https://openalex.org/W4388650605","https://openalex.org/W1499340026","https://openalex.org/W1486784581","https://openalex.org/W2074950806"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":47},{"year":2020,"cited_by_count":40},{"year":2019,"cited_by_count":16},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
