{"id":"https://openalex.org/W3161040011","doi":"https://doi.org/10.1007/s10994-020-05938-9","title":"Importance sampling in reinforcement learning with an estimated behavior policy","display_name":"Importance sampling in reinforcement learning with an estimated behavior policy","publication_year":2021,"publication_date":"2021-05-07","ids":{"openalex":"https://openalex.org/W3161040011","doi":"https://doi.org/10.1007/s10994-020-05938-9","mag":"3161040011"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-020-05938-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-020-05938-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-020-05938-9.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-020-05938-9.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008014974","display_name":"Josiah P. Hanna","orcid":"https://orcid.org/0000-0002-7411-0398"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Josiah P. Hanna","raw_affiliation_strings":["School of Informatics, University of Edinburgh, Edinburgh, EH8 9AB, UK"],"raw_orcid":"https://orcid.org/0000-0002-7411-0398","affiliations":[{"raw_affiliation_string":"School of Informatics, University of Edinburgh, Edinburgh, EH8 9AB, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043572737","display_name":"Scott Niekum","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Niekum","raw_affiliation_strings":["Department of Computer Science, University of Texas at Austin, Austin, TX, 78712-1757, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Texas at Austin, Austin, TX, 78712-1757, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001594330","display_name":"Peter Stone","orcid":"https://orcid.org/0000-0002-6795-420X"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Stone","raw_affiliation_strings":["Department of Computer Science, University of Texas at Austin, Austin, TX, 78712-1757, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Texas at Austin, Austin, TX, 78712-1757, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5008014974"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":2.6584,"has_fulltext":true,"cited_by_count":26,"citation_normalized_percentile":{"value":0.91473757,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"110","issue":"6","first_page":"1267","last_page":"1317"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9402999877929688,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10928","display_name":"Probabilistic and Robust Engineering Design","score":0.932699978351593,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.8607034683227539},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.7455658912658691},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.6779074668884277},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.6597128510475159},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6339696645736694},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5944730639457703},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5534220933914185},{"id":"https://openalex.org/keywords/control-variates","display_name":"Control variates","score":0.45159363746643066},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.42148882150650024},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.38592779636383057},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.31107664108276367},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2582903802394867},{"id":"https://openalex.org/keywords/hybrid-monte-carlo","display_name":"Hybrid Monte Carlo","score":0.22283634543418884},{"id":"https://openalex.org/keywords/markov-chain-monte-carlo","display_name":"Markov chain Monte Carlo","score":0.19189155101776123}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.8607034683227539},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.7455658912658691},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.6779074668884277},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.6597128510475159},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6339696645736694},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5944730639457703},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5534220933914185},{"id":"https://openalex.org/C121683094","wikidata":"https://www.wikidata.org/wiki/Q3554721","display_name":"Control variates","level":5,"score":0.45159363746643066},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.42148882150650024},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.38592779636383057},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31107664108276367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2582903802394867},{"id":"https://openalex.org/C13153151","wikidata":"https://www.wikidata.org/wiki/Q1639846","display_name":"Hybrid Monte Carlo","level":4,"score":0.22283634543418884},{"id":"https://openalex.org/C111350023","wikidata":"https://www.wikidata.org/wiki/Q1191869","display_name":"Markov chain Monte Carlo","level":3,"score":0.19189155101776123},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10994-020-05938-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-020-05938-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-020-05938-9.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/02c0da3f-37da-4322-a84b-410b84b016df","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/02c0da3f-37da-4322-a84b-410b84b016df","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Hanna, J P, Niekum, S & Stone, P 2021, 'Importance sampling in reinforcement learning with an estimated behavior policy', Machine Learning, vol. 110, no. 6, pp. 1267-1317. https://doi.org/10.1007/s10994-020-05938-9","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1007/s10994-020-05938-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-020-05938-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-020-05938-9.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5899999737739563,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1233958661","display_name":null,"funder_award_id":"IIS-1749204","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G2054556124","display_name":null,"funder_award_id":"IIS-1617639","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G2136314175","display_name":null,"funder_award_id":"W911NF-19-2-0333","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G3609089545","display_name":null,"funder_award_id":"N00014-18-2243","funder_id":"https://openalex.org/F4320310620","funder_display_name":"University of Texas at Austin"},{"id":"https://openalex.org/G3623752828","display_name":null,"funder_award_id":"IIS-1638107","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G3694786146","display_name":null,"funder_award_id":"CPS-1739964","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3881771821","display_name":null,"funder_award_id":"N00014-18-2243","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G3985699509","display_name":null,"funder_award_id":"CPS-1739964","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G4160581209","display_name":null,"funder_award_id":"NRI-1925082","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5029475413","display_name":null,"funder_award_id":"IIS-1724157","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5910453376","display_name":null,"funder_award_id":"IIS-1724157","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G5910919248","display_name":"CAREER: Safe and Efficient Robot Learning from Demonstration in the Real World","funder_award_id":"1749204","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G608057831","display_name":null,"funder_award_id":"W911NF-19","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G630961297","display_name":"NRI: FND: Improving Robot Learning from Feedback  and Demonstration using Natural Language","funder_award_id":"1925082","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6928197803","display_name":"NRI: Collaborative Research: Scalable Robot Autonomy through Remote Operator Assistance and Lifelong Learning","funder_award_id":"1638107","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7452299184","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G760144772","display_name":null,"funder_award_id":"NRI-1925082","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G7861584084","display_name":null,"funder_award_id":"RFP2-000","funder_id":"https://openalex.org/F4320317463","funder_display_name":"Future of Life Institute"},{"id":"https://openalex.org/G8081156953","display_name":null,"funder_award_id":"1739964","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G829377574","display_name":null,"funder_award_id":"1724157","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8415477955","display_name":null,"funder_award_id":"W911NF-19","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8828030238","display_name":null,"funder_award_id":"W911NF-19-2-0333","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306994","display_name":"Lockheed Martin","ror":"https://ror.org/026er9r08"},{"id":"https://openalex.org/F4320310620","display_name":"University of Texas at Austin","ror":"https://ror.org/00hj54h04"},{"id":"https://openalex.org/F4320313176","display_name":"Robert Bosch","ror":"https://ror.org/02venad53"},{"id":"https://openalex.org/F4320317463","display_name":"Future of Life Institute","ror":null},{"id":"https://openalex.org/F4320332169","display_name":"Directorate for Computer and Information Science and Engineering","ror":"https://ror.org/025kzpk63"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3161040011.pdf","grobid_xml":"https://content.openalex.org/works/W3161040011.grobid-xml"},"referenced_works_count":84,"referenced_works":["https://openalex.org/W191658262","https://openalex.org/W298976786","https://openalex.org/W1191599655","https://openalex.org/W1502364872","https://openalex.org/W1514587017","https://openalex.org/W1532325895","https://openalex.org/W1549353711","https://openalex.org/W1569296262","https://openalex.org/W1600437712","https://openalex.org/W1632550705","https://openalex.org/W1724455341","https://openalex.org/W1809653203","https://openalex.org/W1931027396","https://openalex.org/W2009618608","https://openalex.org/W2025533349","https://openalex.org/W2034806082","https://openalex.org/W2036193982","https://openalex.org/W2045569659","https://openalex.org/W2098152875","https://openalex.org/W2100752967","https://openalex.org/W2101915445","https://openalex.org/W2108682071","https://openalex.org/W2113913482","https://openalex.org/W2119567691","https://openalex.org/W2119717200","https://openalex.org/W2121506959","https://openalex.org/W2121863487","https://openalex.org/W2125612430","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2260756217","https://openalex.org/W2340679559","https://openalex.org/W2341171179","https://openalex.org/W2565313327","https://openalex.org/W2568411832","https://openalex.org/W2593237273","https://openalex.org/W2625967765","https://openalex.org/W2753511062","https://openalex.org/W2785973002","https://openalex.org/W2810033056","https://openalex.org/W2882329280","https://openalex.org/W2886249230","https://openalex.org/W2890022552","https://openalex.org/W2890951405","https://openalex.org/W2904789544","https://openalex.org/W2932300198","https://openalex.org/W2949608212","https://openalex.org/W2952385510","https://openalex.org/W2952815050","https://openalex.org/W2962785510","https://openalex.org/W2962802563","https://openalex.org/W2963184621","https://openalex.org/W2963748089","https://openalex.org/W2964016927","https://openalex.org/W2964043796","https://openalex.org/W2964121744","https://openalex.org/W2964225533","https://openalex.org/W2964337555","https://openalex.org/W2994611444","https://openalex.org/W3013721131","https://openalex.org/W3035536593","https://openalex.org/W3041339286","https://openalex.org/W3049337243","https://openalex.org/W3105983955","https://openalex.org/W3122193054","https://openalex.org/W3139377883","https://openalex.org/W3155307338","https://openalex.org/W4206038828","https://openalex.org/W4213009331","https://openalex.org/W4214717370","https://openalex.org/W4237591687","https://openalex.org/W4244387609","https://openalex.org/W4310895557","https://openalex.org/W6607764300","https://openalex.org/W6610666704","https://openalex.org/W6631190155","https://openalex.org/W6635902665","https://openalex.org/W6638018090","https://openalex.org/W6640490175","https://openalex.org/W6677916085","https://openalex.org/W6677916933","https://openalex.org/W6728731650","https://openalex.org/W6734043786","https://openalex.org/W7001894244"],"related_works":["https://openalex.org/W2469237301","https://openalex.org/W4287557330","https://openalex.org/W3111562146","https://openalex.org/W3122602933","https://openalex.org/W2950038056","https://openalex.org/W1544940847","https://openalex.org/W4231271795","https://openalex.org/W2289285490","https://openalex.org/W160467666","https://openalex.org/W1997242758"],"abstract_inverted_index":{"Abstract":[0],"In":[1,57],"reinforcement":[2],"learning,":[3],"importance":[4,62],"sampling":[5,36,63,93,131],"is":[6],"a":[7,32,47],"widely":[8],"used":[9],"method":[10],"for":[11,137],"evaluating":[12],"an":[13],"expectation":[14],"under":[15,80],"the":[16,24,39,43,53,65,81,116,126],"distribution":[17],"of":[18,20,46,52,77,128],"data":[19,25],"one":[21],"policy":[22,49,67,138,145],"when":[23],"has":[26],"in":[27,95,115],"fact":[28],"been":[29],"generated":[30],"by":[31,72],"different":[33],"policy.":[34,56],"Importance":[35],"requires":[37],"computing":[38],"likelihood":[40,75],"ratio":[41],"between":[42],"action":[44,68],"probabilities":[45,69,79],"target":[48],"and":[50,141,159],"those":[51],"data-producing":[54],"behavior":[55,66],"this":[58,86,107],"article,":[59],"we":[60],"study":[61],"where":[64],"are":[70,157],"replaced":[71],"their":[73],"maximum":[74],"estimate":[76,110],"these":[78,122],"observed":[82],"data.":[83],"We":[84,100,119,147],"show":[85],"general":[87,123],"technique":[88,108],"reduces":[89],"variance":[90,127,163],"due":[91],"to":[92,109,134],"error":[94],"Monte":[96,129,165],"Carlo":[97,130,166],"style":[98],"estimators.":[99,167],"introduce":[101],"two":[102],"novel":[103],"estimators":[104,124,156],"that":[105,113,121,153],"use":[106],"expected":[111],"values":[112],"arise":[114],"RL":[117],"literature.":[118],"find":[120],"reduce":[125],"methods,":[132],"leading":[133],"faster":[135],"learning":[136],"gradient":[139],"algorithms":[140],"more":[142],"accurate":[143],"off-policy":[144],"evaluation.":[146],"also":[148],"provide":[149],"theoretical":[150],"analysis":[151],"showing":[152],"our":[154],"new":[155],"consistent":[158],"have":[160],"asymptotically":[161],"lower":[162],"than":[164]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
