{"id":"https://openalex.org/W4387389960","doi":"https://doi.org/10.48550/arxiv.2310.02581","title":"Online Estimation and Inference for Robust Policy Evaluation in Reinforcement Learning","display_name":"Online Estimation and Inference for Robust Policy Evaluation in Reinforcement Learning","publication_year":2023,"publication_date":"2023-10-04","ids":{"openalex":"https://openalex.org/W4387389960","doi":"https://doi.org/10.48550/arxiv.2310.02581"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2310.02581","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.02581","pdf_url":"https://arxiv.org/pdf/2310.02581","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2310.02581","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100687149","display_name":"Weidong Liu","orcid":"https://orcid.org/0000-0002-5449-9180"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Weidong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088466985","display_name":"Jiyuan Tu","orcid":"https://orcid.org/0000-0002-1264-4837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Jiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100329931","display_name":"Xi Chen","orcid":"https://orcid.org/0000-0002-3135-4114"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341419","display_name":"Yichen Zhang","orcid":"https://orcid.org/0000-0002-6925-0775"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yichen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100687149"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12879","display_name":"Distributed Sensor Networks and Detection Algorithms","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12879","display_name":"Distributed Sensor Networks and Detection Algorithms","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9656000137329102,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9629999995231628,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8417495489120483},{"id":"https://openalex.org/keywords/statistical-inference","display_name":"Statistical inference","score":0.6964734792709351},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6669856309890747},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6651607751846313},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6467885971069336},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.6300327181816101},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6186125874519348},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.55315101146698},{"id":"https://openalex.org/keywords/asymptotic-distribution","display_name":"Asymptotic distribution","score":0.44898468255996704},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.44768643379211426},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.21610233187675476},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18305134773254395}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8417495489120483},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.6964734792709351},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6669856309890747},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6651607751846313},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6467885971069336},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.6300327181816101},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6186125874519348},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.55315101146698},{"id":"https://openalex.org/C65778772","wikidata":"https://www.wikidata.org/wiki/Q12345341","display_name":"Asymptotic distribution","level":3,"score":0.44898468255996704},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.44768643379211426},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.21610233187675476},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18305134773254395},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2310.02581","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.02581","pdf_url":"https://arxiv.org/pdf/2310.02581","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2310.02581","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2310.02581","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2310.02581","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.02581","pdf_url":"https://arxiv.org/pdf/2310.02581","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4000000059604645}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387389960.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W3006513224","https://openalex.org/W137830373","https://openalex.org/W3000984192","https://openalex.org/W2103073163","https://openalex.org/W4286952477","https://openalex.org/W4321348134","https://openalex.org/W4387929287","https://openalex.org/W3121677332","https://openalex.org/W2063040110"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,27,46,68,158],"has":[2],"emerged":[3],"as":[4],"one":[5],"of":[6,44,63,73,100,146],"the":[7,24,38,61,97,116,144],"prominent":[8],"topics":[9],"attracting":[10],"attention":[11],"in":[12,66,127,153],"modern":[13],"statistical":[14,35,112,125],"learning,":[15,129],"with":[16],"policy":[17,92,139],"evaluation":[18,93],"being":[19],"a":[20,80,88,131],"key":[21],"component.":[22],"Unlike":[23],"traditional":[25],"machine":[26],"literature":[28],"on":[29,115],"this":[30,84],"topic,":[31],"our":[32,101,147],"work":[33],"emphasizes":[34],"inference":[36,113,126],"for":[37],"model":[39],"parameters":[40],"and":[41,76,95,124,134,155],"value":[42],"functions":[43],"reinforcement":[45,67,128,157],"algorithms.":[47],"While":[48],"most":[49],"existing":[50],"analyses":[51],"assume":[52],"random":[53],"rewards":[54,78],"to":[55,109,137],"follow":[56],"standard":[57],"distributions,":[58],"we":[59,86,104,142],"embrace":[60],"concept":[62],"robust":[64,91,122],"statistics":[65,123],"by":[69],"simultaneously":[70],"addressing":[71],"issues":[72],"outlier":[74],"contamination":[75],"heavy-tailed":[77],"within":[79],"unified":[81],"framework.":[82],"In":[83],"paper,":[85],"develop":[87,105],"fully":[89],"online":[90,107,138],"procedure,":[94],"establish":[96],"Bahadur-type":[98],"representation":[99],"estimator.":[102],"Furthermore,":[103],"an":[106],"procedure":[108],"efficiently":[110],"conduct":[111],"based":[114],"asymptotic":[117],"distribution.":[118],"This":[119],"paper":[120],"connects":[121],"offering":[130],"more":[132],"versatile":[133],"reliable":[135],"approach":[136],"evaluation.":[140],"Finally,":[141],"validate":[143],"efficacy":[145],"algorithm":[148],"through":[149],"numerical":[150],"experiments":[151],"conducted":[152],"simulations":[154],"real-world":[156],"experiments.":[159]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2025-10-10T00:00:00"}
