{"id":"https://openalex.org/W3006151906","doi":"https://doi.org/10.24963/ijcai.2020/391","title":"BRPO: Batch Residual Policy Optimization","display_name":"BRPO: Batch Residual Policy Optimization","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3006151906","doi":"https://doi.org/10.24963/ijcai.2020/391","mag":"3006151906"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2020/391","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/391","pdf_url":"https://www.ijcai.org/proceedings/2020/0391.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2020/0391.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019504524","display_name":"Sungryull Sohn","orcid":"https://orcid.org/0000-0001-7733-4293"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sungryull Sohn","raw_affiliation_strings":["Google Research","University of Michigan"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039948454","display_name":"Yinlam Chow","orcid":"https://orcid.org/0000-0002-7533-8300"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yinlam Chow","raw_affiliation_strings":["Google Research","Google Research,"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069722453","display_name":"Jayden Ooi","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jayden Ooi","raw_affiliation_strings":["Google Research","Google Research,"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057773393","display_name":"Ofir Nachum","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ofir Nachum","raw_affiliation_strings":["Google Research","Google Research,"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108652283","display_name":"Honglak Lee","orcid":"https://orcid.org/0000-0002-1279-0068"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Honglak Lee","raw_affiliation_strings":["Google Research","University of Michigan"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028125399","display_name":"Ed H.","orcid":"https://orcid.org/0000-0003-3230-5338"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ed Chi","raw_affiliation_strings":["Google Research","Google Research,"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036934218","display_name":"Craig Boutilier","orcid":"https://orcid.org/0000-0001-9330-4545"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Craig Boutilier","raw_affiliation_strings":["Google Research","Google Research,"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google Research,","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5019504524"],"corresponding_institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":0.73429774,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74862546,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2824","last_page":"2830"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9722999930381775,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7871257066726685},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.77894127368927},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7696653604507446},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6469295620918274},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.6302916407585144},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5289504528045654},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.43760496377944946},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.4280599355697632},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3426024913787842},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23185107111930847},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19223320484161377},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.08296561241149902}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7871257066726685},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.77894127368927},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7696653604507446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6469295620918274},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.6302916407585144},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5289504528045654},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.43760496377944946},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.4280599355697632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3426024913787842},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23185107111930847},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19223320484161377},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08296561241149902},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.24963/ijcai.2020/391","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/391","pdf_url":"https://www.ijcai.org/proceedings/2020/0391.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2002.05522","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.05522","pdf_url":"https://arxiv.org/pdf/2002.05522","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3006151906","is_oa":true,"landing_page_url":"http://arxiv.org/pdf/2002.05522.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2002.05522","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2002.05522","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2020/391","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/391","pdf_url":"https://www.ijcai.org/proceedings/2020/0391.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3006151906.pdf","grobid_xml":"https://content.openalex.org/works/W3006151906.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W1541527977","https://openalex.org/W1575592356","https://openalex.org/W1745373831","https://openalex.org/W1757796397","https://openalex.org/W1968154520","https://openalex.org/W2007817872","https://openalex.org/W2021530792","https://openalex.org/W2071841410","https://openalex.org/W2107741520","https://openalex.org/W2121863487","https://openalex.org/W2149414429","https://openalex.org/W2155968351","https://openalex.org/W2173248099","https://openalex.org/W2296319761","https://openalex.org/W2472245206","https://openalex.org/W2554120691","https://openalex.org/W2592457170","https://openalex.org/W2781726626","https://openalex.org/W2789965037","https://openalex.org/W2898621204","https://openalex.org/W2904453761","https://openalex.org/W2904746163","https://openalex.org/W2963836326","https://openalex.org/W2968800739","https://openalex.org/W2971262355","https://openalex.org/W2979211489","https://openalex.org/W3030163244","https://openalex.org/W3189892487","https://openalex.org/W6607789263","https://openalex.org/W6634239167","https://openalex.org/W6638018090","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W3034274799","https://openalex.org/W2753569380","https://openalex.org/W2988703099","https://openalex.org/W3009509472","https://openalex.org/W2736601468","https://openalex.org/W3095986187","https://openalex.org/W2763287778","https://openalex.org/W3085501055","https://openalex.org/W2145288976","https://openalex.org/W3124201714","https://openalex.org/W2950849043","https://openalex.org/W2963501401","https://openalex.org/W2766232148","https://openalex.org/W2959356899","https://openalex.org/W2126282678","https://openalex.org/W3164473225","https://openalex.org/W2889893435","https://openalex.org/W3121832314","https://openalex.org/W3033120977","https://openalex.org/W2911648082"],"abstract_inverted_index":{"In":[0],"batch":[1,46],"reinforcement":[2],"learning":[3],"(RL),":[4],"one":[5],"often":[6],"constrains":[7],"a":[8,88,105,120],"learned":[9,23,82],"policy":[10,31,56,83,98,109],"to":[11,14,26,48,53],"be":[12,49],"close":[13],"the":[15,22,29,38,77,81,97,116],"behavior":[16,30],"(data-generating)":[17],"policy,":[18],"e.g.,":[19],"by":[20,32],"constraining":[21],"action":[24],"distribution":[25],"differ":[27],"from":[28],"some":[33],"maximum":[34],"degree":[35],"that":[36,102,113],"is":[37,84],"same":[39],"at":[40,58,66],"each":[41],"state.":[42],"This":[43],"can":[44],"cause":[45],"RL":[47,91],"overly":[50],"conservative,":[51],"unable":[52],"exploit":[54],"large":[55],"changes":[57],"frequently-visited,":[59],"high-confidence":[60],"states":[61],"without":[62],"risking":[63],"poor":[64],"performance":[65,118],"sparsely-visited":[67],"states.":[68],"To":[69],"remedy":[70],"this,":[71],"we":[72],"propose":[73],"residual":[74],"policies,":[75],"where":[76],"allowable":[78,100],"deviation":[79,101],"of":[80,122],"state-action-dependent.":[85],"We":[86,111],"derive":[87],"new":[89],"for":[90],"method,":[92],"BRPO,":[93],"which":[94],"learns":[95],"both":[96],"and":[99],"jointly":[103],"maximize":[104],"lower":[106],"bound":[107],"on":[108],"performance.":[110],"show":[112],"BRPO":[114],"achieves":[115],"state-of-the-art":[117],"in":[119],"number":[121],"tasks.":[123]},"counts_by_year":[{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
