{"id":"https://openalex.org/W3138939485","doi":"https://doi.org/10.1109/tit.2022.3185139","title":"Bridging Offline Reinforcement Learning and Imitation Learning: A Tale of Pessimism","display_name":"Bridging Offline Reinforcement Learning and Imitation Learning: A Tale of Pessimism","publication_year":2022,"publication_date":"2022-06-22","ids":{"openalex":"https://openalex.org/W3138939485","doi":"https://doi.org/10.1109/tit.2022.3185139","mag":"3138939485"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2022.3185139","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2022.3185139","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016597745","display_name":"Paria Rashidinejad","orcid":"https://orcid.org/0000-0002-8881-8346"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Paria Rashidinejad","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031613608","display_name":"Banghua Zhu","orcid":"https://orcid.org/0000-0002-7320-3533"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Banghua Zhu","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074490643","display_name":"Cong Ma","orcid":"https://orcid.org/0000-0003-2532-0038"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cong Ma","raw_affiliation_strings":["Department of Statistics, The University of Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Statistics, The University of Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034192173","display_name":"Jiantao Jiao","orcid":"https://orcid.org/0000-0003-3766-8031"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiantao Jiao","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007305440","display_name":"Stuart Russell","orcid":"https://orcid.org/0000-0001-5252-4306"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stuart Russell","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016597745"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":5.1049,"has_fulltext":false,"cited_by_count":54,"citation_normalized_percentile":{"value":0.95827075,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"68","issue":"12","first_page":"8156","last_page":"8196"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8224009871482849},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7233525514602661},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.6090601086616516},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5996556878089905},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5776996612548828},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5023584365844727},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.43802914023399353},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.4125634431838989},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.23470288515090942},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.1940290331840515},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.17849871516227722},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16970273852348328},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08472606539726257}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8224009871482849},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7233525514602661},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.6090601086616516},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5996556878089905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5776996612548828},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5023584365844727},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.43802914023399353},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.4125634431838989},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.23470288515090942},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.1940290331840515},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.17849871516227722},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16970273852348328},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08472606539726257},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tit.2022.3185139","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2022.3185139","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G3147760301","display_name":null,"funder_award_id":"CCF-1909499","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5689959267","display_name":null,"funder_award_id":"DMS-2023505","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G645253323","display_name":null,"funder_award_id":"IIS-1901252","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320319993","display_name":"Leverhulme Trust","ror":"https://ror.org/012mzw131"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":182,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W1535258871","https://openalex.org/W1538452572","https://openalex.org/W1575592356","https://openalex.org/W1730555343","https://openalex.org/W1757796397","https://openalex.org/W1867103660","https://openalex.org/W1915973093","https://openalex.org/W2012547817","https://openalex.org/W2014944833","https://openalex.org/W2073384958","https://openalex.org/W2104753538","https://openalex.org/W2108794978","https://openalex.org/W2113065326","https://openalex.org/W2119579400","https://openalex.org/W2120678009","https://openalex.org/W2122689259","https://openalex.org/W2128812357","https://openalex.org/W2142641780","https://openalex.org/W2145339207","https://openalex.org/W2151416233","https://openalex.org/W2163162311","https://openalex.org/W2209913494","https://openalex.org/W2257979135","https://openalex.org/W2334782222","https://openalex.org/W2342840547","https://openalex.org/W2518306676","https://openalex.org/W2545659366","https://openalex.org/W2605323209","https://openalex.org/W2766447205","https://openalex.org/W2805861379","https://openalex.org/W2806163172","https://openalex.org/W2904453761","https://openalex.org/W2904730732","https://openalex.org/W2942709080","https://openalex.org/W2946911039","https://openalex.org/W2947150733","https://openalex.org/W2951295612","https://openalex.org/W2951403958","https://openalex.org/W2952295663","https://openalex.org/W2953303875","https://openalex.org/W2953981431","https://openalex.org/W2962785728","https://openalex.org/W2963044034","https://openalex.org/W2963049774","https://openalex.org/W2963325394","https://openalex.org/W2963561234","https://openalex.org/W2963704132","https://openalex.org/W2963872309","https://openalex.org/W2963971282","https://openalex.org/W2964299116","https://openalex.org/W2970789256","https://openalex.org/W2971026276","https://openalex.org/W2971262355","https://openalex.org/W2971587637","https://openalex.org/W2978455699","https://openalex.org/W2979211489","https://openalex.org/W2990006450","https://openalex.org/W2991355586","https://openalex.org/W2993185773","https://openalex.org/W2994798986","https://openalex.org/W2995041734","https://openalex.org/W2995638039","https://openalex.org/W2995706821","https://openalex.org/W3000543388","https://openalex.org/W3009584650","https://openalex.org/W3014860839","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3025606523","https://openalex.org/W3028766998","https://openalex.org/W3029338183","https://openalex.org/W3033324992","https://openalex.org/W3034525128","https://openalex.org/W3034541690","https://openalex.org/W3034607397","https://openalex.org/W3035634530","https://openalex.org/W3037024314","https://openalex.org/W3037440645","https://openalex.org/W3038915804","https://openalex.org/W3042380218","https://openalex.org/W3045247449","https://openalex.org/W3046626913","https://openalex.org/W3046692137","https://openalex.org/W3048911479","https://openalex.org/W3086417465","https://openalex.org/W3091897465","https://openalex.org/W3093206925","https://openalex.org/W3093887912","https://openalex.org/W3094261094","https://openalex.org/W3099577521","https://openalex.org/W3101023256","https://openalex.org/W3102354071","https://openalex.org/W3103383763","https://openalex.org/W3104954646","https://openalex.org/W3112486745","https://openalex.org/W3113240575","https://openalex.org/W3114509039","https://openalex.org/W3124408148","https://openalex.org/W3127596788","https://openalex.org/W3128383903","https://openalex.org/W3130177876","https://openalex.org/W3131920644","https://openalex.org/W3148530143","https://openalex.org/W3159738529","https://openalex.org/W3166645952","https://openalex.org/W3167624337","https://openalex.org/W3169514089","https://openalex.org/W3201700917","https://openalex.org/W3212343910","https://openalex.org/W4211221179","https://openalex.org/W4256155303","https://openalex.org/W4287553001","https://openalex.org/W4287667652","https://openalex.org/W4288094554","https://openalex.org/W4294407760","https://openalex.org/W4298857966","https://openalex.org/W6634413486","https://openalex.org/W6637597983","https://openalex.org/W6637967152","https://openalex.org/W6639926649","https://openalex.org/W6677102192","https://openalex.org/W6678030633","https://openalex.org/W6679100793","https://openalex.org/W6680724558","https://openalex.org/W6682074005","https://openalex.org/W6704559304","https://openalex.org/W6729089391","https://openalex.org/W6729229454","https://openalex.org/W6743605167","https://openalex.org/W6745096475","https://openalex.org/W6752725515","https://openalex.org/W6757469721","https://openalex.org/W6761973403","https://openalex.org/W6762719077","https://openalex.org/W6763118400","https://openalex.org/W6763704811","https://openalex.org/W6764366976","https://openalex.org/W6764976746","https://openalex.org/W6765767185","https://openalex.org/W6766497270","https://openalex.org/W6767941141","https://openalex.org/W6768617876","https://openalex.org/W6768843968","https://openalex.org/W6769666240","https://openalex.org/W6770585564","https://openalex.org/W6771270455","https://openalex.org/W6771428195","https://openalex.org/W6771886500","https://openalex.org/W6772092215","https://openalex.org/W6772925031","https://openalex.org/W6773034082","https://openalex.org/W6774583691","https://openalex.org/W6774823833","https://openalex.org/W6776438516","https://openalex.org/W6776530296","https://openalex.org/W6776601253","https://openalex.org/W6777091672","https://openalex.org/W6777656069","https://openalex.org/W6777828934","https://openalex.org/W6779265984","https://openalex.org/W6780159193","https://openalex.org/W6780386840","https://openalex.org/W6780964083","https://openalex.org/W6781389245","https://openalex.org/W6781960742","https://openalex.org/W6783671239","https://openalex.org/W6784114448","https://openalex.org/W6784678651","https://openalex.org/W6784748513","https://openalex.org/W6784750246","https://openalex.org/W6784986766","https://openalex.org/W6785329447","https://openalex.org/W6785423136","https://openalex.org/W6786103492","https://openalex.org/W6786798588","https://openalex.org/W6787043984","https://openalex.org/W6787262146","https://openalex.org/W6789617958","https://openalex.org/W6790014184","https://openalex.org/W6791413555","https://openalex.org/W6795063903"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W3096874164","https://openalex.org/W1985560493","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W1970303738","https://openalex.org/W2341346307","https://openalex.org/W3168977894"],"abstract_inverted_index":{"Offline":[0],"reinforcement":[1],"learning":[2,33,96,232],"(RL)":[3],"algorithms":[4],"seek":[5],"to":[6,119,194,202,233],"learn":[7],"an":[8,110],"optimal":[9,116,219,244],"policy":[10],"from":[11,57,230],"a":[12,51,75,114,129,186,227],"fixed":[13],"dataset":[14],"without":[15],"active":[16],"data":[17,64,91,121,223],"collection.":[18],"Based":[19],"on":[20,137],"the":[21,24,62,87,140,195,203,221],"composition":[22,65,224],"of":[23,90,142,151],"offline":[25,42,77,99,145,208,234],"dataset,":[26],"two":[27,59,88],"main":[28],"methods":[29],"are":[30],"used:":[31],"imitation":[32,95,199,231],"which":[34,44],"is":[35,66,217,241],"suitable":[36],"for":[37,190,220],"expert":[38],"datasets,":[39,192],"and":[40,61,97,163,182],"vanilla":[41,98],"RL":[43,78],"often":[45,55],"requires":[46],"uniform":[47],"coverage":[48],"datasets.":[49],"From":[50],"practical":[52],"standpoint,":[53],"datasets":[54],"deviate":[56],"these":[58],"extremes":[60,89],"exact":[63],"usually":[67],"unknown.":[68],"To":[69,123],"bridge":[70],"this":[71,102,125],"gap,":[72],"we":[73,105,127,213],"present":[74],"new":[76,103],"framework,":[79,104],"called":[80],"single-policy":[81],"concentrability,":[82],"that":[83,112,215,239],"smoothly":[84],"interpolates":[85],"between":[86],"composition,":[92],"hence":[93],"unifying":[94],"RL.":[100,146,209,235],"Under":[101],"ask:":[106],"can":[107],"one":[108,196],"develop":[109],"algorithm":[111,134],"achieves":[113,185],"minimax":[115],"rate":[117,189,205],"adaptive":[118],"unknown":[120],"composition?":[122],"address":[124],"question,":[126],"consider":[128],"lower":[130],"confidence":[131],"bound":[132],"(LCB)":[133],"developed":[135],"based":[136],"pessimism":[138],"in":[139,144,158,178,207,245],"face":[141],"uncertainty":[143],"We":[147,236],"study":[148],"finite-sample":[149],"properties":[150],"LCB":[152,184,216,240],"as":[153,155],"well":[154],"information-theoretic":[156],"limits":[157],"multi-armed":[159],"bandits,":[160,162,212],"contextual":[161,180,211],"Markov":[164],"decision":[165],"processes":[166],"(MDPs).":[167],"Our":[168],"analysis":[169],"reveals":[170],"surprising":[171],"facts":[172],"about":[173],"optimality":[174],"rates.":[175],"In":[176,210],"particular,":[177],"both":[179],"bandits":[181],"RL,":[183],"fast":[187],"convergence":[188],"nearly-expert":[191],"analogous":[193],"achieved":[197,206],"by":[198],"learning,":[200],"contrary":[201],"slow":[204],"prove":[214],"adaptively":[218,243],"entire":[222],"range,":[225],"achieving":[226],"smooth":[228],"transition":[229],"further":[237],"show":[238],"almost":[242],"MDPs.":[246]},"counts_by_year":[{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":17}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
