{"id":"https://openalex.org/W3124408148","doi":"https://doi.org/10.1109/tit.2022.3162335","title":"Minimax Off-Policy Evaluation for Multi-Armed Bandits","display_name":"Minimax Off-Policy Evaluation for Multi-Armed Bandits","publication_year":2022,"publication_date":"2022-03-25","ids":{"openalex":"https://openalex.org/W3124408148","doi":"https://doi.org/10.1109/tit.2022.3162335","mag":"3124408148"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2022.3162335","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2022.3162335","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074490643","display_name":"Cong Ma","orcid":"https://orcid.org/0000-0003-2532-0038"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Cong Ma","raw_affiliation_strings":["Department of Statistics, The University of Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Statistics, The University of Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031613608","display_name":"Banghua Zhu","orcid":"https://orcid.org/0000-0002-7320-3533"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Banghua Zhu","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034192173","display_name":"Jiantao Jiao","orcid":"https://orcid.org/0000-0003-3766-8031"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiantao Jiao","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038379562","display_name":"Martin J. Wainwright","orcid":"https://orcid.org/0000-0002-8760-2236"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin J. Wainwright","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5074490643"],"corresponding_institution_ids":["https://openalex.org/I40347166"],"apc_list":null,"apc_paid":null,"fwci":0.4911,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.64688497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"68","issue":"8","first_page":"5314","last_page":"5339"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.804836630821228},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.7752032279968262},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5796664953231812},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.525688648223877},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.5057370662689209},{"id":"https://openalex.org/keywords/minimax-estimator","display_name":"Minimax estimator","score":0.48309850692749023},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.4487275183200836},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.4403132200241089},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4008393883705139},{"id":"https://openalex.org/keywords/minimum-variance-unbiased-estimator","display_name":"Minimum-variance unbiased estimator","score":0.333523154258728},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.24664071202278137}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.804836630821228},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.7752032279968262},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5796664953231812},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.525688648223877},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.5057370662689209},{"id":"https://openalex.org/C133939421","wikidata":"https://www.wikidata.org/wiki/Q6865379","display_name":"Minimax estimator","level":4,"score":0.48309850692749023},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.4487275183200836},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.4403132200241089},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4008393883705139},{"id":"https://openalex.org/C165646398","wikidata":"https://www.wikidata.org/wiki/Q3755281","display_name":"Minimum-variance unbiased estimator","level":3,"score":0.333523154258728},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.24664071202278137},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tit.2022.3162335","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2022.3162335","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2520373344","display_name":null,"funder_award_id":"DOD-ONR-N00014-18-1-2640","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G2571710149","display_name":null,"funder_award_id":"DMS-2015454","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W191658262","https://openalex.org/W1483255752","https://openalex.org/W1512638263","https://openalex.org/W1538444754","https://openalex.org/W1989151402","https://openalex.org/W2001947543","https://openalex.org/W2028426770","https://openalex.org/W2030562249","https://openalex.org/W2073384958","https://openalex.org/W2077902449","https://openalex.org/W2078729747","https://openalex.org/W2086206379","https://openalex.org/W2121863487","https://openalex.org/W2122124659","https://openalex.org/W2127090196","https://openalex.org/W2138909795","https://openalex.org/W2219888463","https://openalex.org/W2275802500","https://openalex.org/W2561586961","https://openalex.org/W2734936460","https://openalex.org/W2762763087","https://openalex.org/W2912099989","https://openalex.org/W2951142340","https://openalex.org/W2962802563","https://openalex.org/W2963017284","https://openalex.org/W2963323139","https://openalex.org/W2963389257","https://openalex.org/W2963608890","https://openalex.org/W2964068481","https://openalex.org/W2990216309","https://openalex.org/W3034541690","https://openalex.org/W3037792667","https://openalex.org/W3043057128","https://openalex.org/W3101382006","https://openalex.org/W3107128594","https://openalex.org/W3122193054","https://openalex.org/W4206530644","https://openalex.org/W4211030719","https://openalex.org/W4211221179","https://openalex.org/W4214717370","https://openalex.org/W4233471163","https://openalex.org/W4287714048","https://openalex.org/W4297575875","https://openalex.org/W6607764300","https://openalex.org/W6632302721","https://openalex.org/W6678276431","https://openalex.org/W6689664320","https://openalex.org/W6694521564","https://openalex.org/W6704084210","https://openalex.org/W6731062297","https://openalex.org/W6762969092","https://openalex.org/W6773400333","https://openalex.org/W6774823833"],"related_works":["https://openalex.org/W3174947331","https://openalex.org/W2963760573","https://openalex.org/W237619808","https://openalex.org/W2104713681","https://openalex.org/W2895916002","https://openalex.org/W2045096965","https://openalex.org/W2356451205","https://openalex.org/W35332197","https://openalex.org/W2072918764","https://openalex.org/W2051240917"],"abstract_inverted_index":{"We":[0,165],"study":[1,143],"the":[2,8,25,33,41,56,67,76,85,98,103,106,115,119,126,142,145,155,160,168,178,185,210],"problem":[3],"of":[4,66,78,105,118,144,177],"off-policy":[5],"evaluation":[6],"in":[7,64,149],"multi-armed":[9],"bandit":[10],"model":[11],"with":[12,102],"bounded":[13],"rewards,":[14],"and":[15,43,80,219],"develop":[16],"minimax":[17,48],"rate-optimal":[18,49],"procedures":[19],"under":[20],"three":[21],"settings.":[22],"First,":[23],"when":[24,55,184],"behavior":[26,57,82,86,107,161],"policy":[27,58,87,162],"is":[28,47,59,88,152,163,171,182,188],"known,":[29],"we":[30,61,123,140,196],"show":[31,166],"that":[32,38,125,154,167,207],"Switch":[34],"estimator,":[35],"a":[36,72,110,136,198],"method":[37],"alternates":[39],"between":[40,75],"plug-in":[42,127,169],"importance":[44],"sampling":[45],"estimators,":[46],"for":[50,173],"all":[51],"sample":[52],"sizes.":[53],"Second,":[54],"unknown,":[60,89],"analyze":[62],"performance":[63],"terms":[65],"competitive":[68,132],"ratio,":[69],"thereby":[70],"revealing":[71],"fundamental":[73],"gap":[74],"settings":[77],"known":[79],"unknown":[81],"policies.":[83],"When":[84],"any":[90],"estimator":[91,100,170,200],"must":[92],"have":[93],"mean-squared":[94],"error":[95],"larger\u2014relative":[96],"to":[97,114,135,192],"oracle":[99],"equipped":[101],"knowledge":[104,147],"policy\u2014":[108],"by":[109,159,204],"multiplicative":[111],"factor":[112],"proportional":[113],"support":[116],"size":[117],"target":[120],"policy.":[121],"Moreover,":[122],"demonstrate":[124],"approach":[128],"achieves":[129,209],"this":[130,194],"worst-case":[131],"ratio":[133],"up":[134],"logarithmic":[137],"factor.":[138],"Third,":[139],"initiate":[141],"partial":[146],"setting":[148],"which":[150],"it":[151],"assumed":[153],"minimum":[156,179,186],"probability":[157,187],"taken":[158],"known.":[164],"optimal":[172,211],"relatively":[174],"large":[175],"values":[176],"probability,":[180],"but":[181],"sub-optimal":[183],"low.":[189],"In":[190],"order":[191],"remedy":[193],"gap,":[195],"propose":[197],"new":[199],"based":[201],"on":[202,216],"approximation":[203],"Chebyshev":[205],"polynomials":[206],"provably":[208],"estimation":[212],"error.":[213],"Numerical":[214],"experiments":[215],"both":[217],"simulated":[218],"real":[220],"data":[221],"corroborate":[222],"our":[223],"theoretical":[224],"findings.":[225]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
