{"id":"https://openalex.org/W2288926313","doi":"https://doi.org/10.1109/icra.2017.7989379","title":"PLATO: Policy learning using adaptive trajectory optimization","display_name":"PLATO: Policy learning using adaptive trajectory optimization","publication_year":2017,"publication_date":"2017-05-01","ids":{"openalex":"https://openalex.org/W2288926313","doi":"https://doi.org/10.1109/icra.2017.7989379","mag":"2288926313"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2017.7989379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2017.7989379","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1603.00622","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064156720","display_name":"Gregory Kahn","orcid":"https://orcid.org/0000-0003-1771-6147"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory Kahn","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","[Berkeley AI Research (BAIR), University of California, Berkeley]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"[Berkeley AI Research (BAIR), University of California, Berkeley]","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669565","display_name":"Tianhao Zhang","orcid":"https://orcid.org/0000-0002-5939-3932"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianhao Zhang","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","[Berkeley AI Research (BAIR), University of California, Berkeley]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"[Berkeley AI Research (BAIR), University of California, Berkeley]","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","[Berkeley AI Research (BAIR), University of California, Berkeley]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"[Berkeley AI Research (BAIR), University of California, Berkeley]","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]},{"id":"https://openalex.org/I4210161460","display_name":"OpenAI (United States)","ror":"https://ror.org/05wx9n238","country_code":"US","type":"company","lineage":["https://openalex.org/I4210161460"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pieter Abbeel","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","International Computer Science Institute (ICSI)","OpenAI","[Berkeley AI Research (BAIR), University of California, Berkeley]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"International Computer Science Institute (ICSI)","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"OpenAI","institution_ids":["https://openalex.org/I4210161460"]},{"raw_affiliation_string":"[Berkeley AI Research (BAIR), University of California, Berkeley]","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.4784,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.914254,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3342","last_page":"3349"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6821429133415222},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.640720784664154},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6078445911407471},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5677222013473511},{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.5613895058631897},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5430338978767395},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5088884830474854},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4712895452976227},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.4591353237628937},{"id":"https://openalex.org/keywords/train","display_name":"Train","score":0.45849788188934326},{"id":"https://openalex.org/keywords/trajectory-optimization","display_name":"Trajectory optimization","score":0.4545312821865082},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44150933623313904},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.41221147775650024},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.28610557317733765},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2548878788948059},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10909014940261841},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08442819118499756}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6821429133415222},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.640720784664154},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6078445911407471},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5677222013473511},{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.5613895058631897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5430338978767395},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5088884830474854},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4712895452976227},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4591353237628937},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.45849788188934326},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.4545312821865082},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44150933623313904},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.41221147775650024},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.28610557317733765},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2548878788948059},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10909014940261841},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08442819118499756},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra.2017.7989379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2017.7989379","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1603.00622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1603.00622","pdf_url":"https://arxiv.org/pdf/1603.00622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2288926313","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1603.00622.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1603.00622","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1603.00622","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1603.00622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1603.00622","pdf_url":"https://arxiv.org/pdf/1603.00622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.6800000071525574,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2288926313.pdf","grobid_xml":"https://content.openalex.org/works/W2288926313.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W1499669280","https://openalex.org/W1600293573","https://openalex.org/W1665214252","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1923344279","https://openalex.org/W1931823119","https://openalex.org/W1980969546","https://openalex.org/W1981723834","https://openalex.org/W2012587148","https://openalex.org/W2028893450","https://openalex.org/W2098284983","https://openalex.org/W2104733512","https://openalex.org/W2107662876","https://openalex.org/W2119112357","https://openalex.org/W2121103318","https://openalex.org/W2147768505","https://openalex.org/W2151210636","https://openalex.org/W2152588577","https://openalex.org/W2155893237","https://openalex.org/W2166644671","https://openalex.org/W2167856595","https://openalex.org/W2169397395","https://openalex.org/W2296673577","https://openalex.org/W2919115771","https://openalex.org/W2949608212","https://openalex.org/W2949888546","https://openalex.org/W2950094539","https://openalex.org/W2962957031","https://openalex.org/W2963864421","https://openalex.org/W2964006217","https://openalex.org/W2964121744","https://openalex.org/W2964161785","https://openalex.org/W4205513846","https://openalex.org/W6631190155","https://openalex.org/W6637242042","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6639949747","https://openalex.org/W6640174482","https://openalex.org/W6674909112","https://openalex.org/W6675999342","https://openalex.org/W6678367057","https://openalex.org/W6682314434","https://openalex.org/W6682539193","https://openalex.org/W6682849425","https://openalex.org/W6684921986","https://openalex.org/W6685113463"],"related_works":["https://openalex.org/W2964070888","https://openalex.org/W2964161785","https://openalex.org/W2396217537","https://openalex.org/W2604173613","https://openalex.org/W3034552332","https://openalex.org/W3203595923","https://openalex.org/W2895958971","https://openalex.org/W2757240943","https://openalex.org/W3029839358","https://openalex.org/W2914871904","https://openalex.org/W3179660843","https://openalex.org/W2528734395","https://openalex.org/W2999490157","https://openalex.org/W2765397130","https://openalex.org/W3022124161","https://openalex.org/W3174070947","https://openalex.org/W2911130482","https://openalex.org/W3114937401","https://openalex.org/W2513373085","https://openalex.org/W3097654316"],"abstract_inverted_index":{"Policy":[0],"search":[1],"can":[2,33,204],"in":[3,101,130,210],"principle":[4],"acquire":[5,35],"complex":[6,47,85],"strategies":[7],"for":[8],"control":[9,71,86,93],"of":[10,103,122,182,194,221],"robots":[11],"and":[12,29,41,108,244],"other":[13],"autonomous":[14],"systems.":[15,73],"When":[16],"the":[17,97,120,127,145,151,168,195],"policy":[18,52,129,170],"is":[19],"trained":[20,107],"to":[21,65,70,95,118,124,132,141,157,190,230,247],"process":[22],"raw":[23],"sensory":[24],"inputs,":[25],"such":[26,46,54],"as":[27,55,154],"images":[28],"depth":[30],"maps,":[31],"it":[32,172],"also":[34,149,199],"a":[36,56,77,105,219,248],"strategy":[37],"that":[38,83,138,162,179,188,202],"combines":[39],"perception":[40],"control.":[42],"However,":[43],"effectively":[44],"processing":[45],"inputs":[48],"requires":[49],"an":[50,114,155],"expressive":[51],"class,":[53],"large":[57],"neural":[58],"network.":[59],"These":[60],"high-dimensional":[61],"policies":[62,87],"are":[63,139],"difficult":[64],"train,":[66],"especially":[67],"when":[68],"learning":[69,81],"safety-critical":[72],"We":[74,177,198],"propose":[75],"PLATO,":[76],"continuous,":[78],"reset-free":[79],"reinforcement":[80],"algorithm":[82],"trains":[84],"with":[88],"supervised":[89],"learning,":[90],"using":[91],"model-predictive":[92],"(MPC)":[94],"generate":[96,133],"supervision,":[98],"hence":[99],"never":[100],"need":[102],"running":[104],"partially":[106],"potentially":[109],"unsafe":[110],"policy.":[111,147,197,250],"PLATO":[112,148,233],"uses":[113],"adaptive":[115,183],"training":[116,134],"method":[117],"modify":[119],"behavior":[121],"MPC":[123,152,184,203],"gradually":[125],"match":[126],"learned":[128,146,169],"order":[131],"samples":[135],"at":[136],"states":[137],"likely":[140],"be":[142],"visited":[143],"by":[144],"maintains":[150],"cost":[153],"objective":[156],"avoid":[158,206],"highly":[159],"undesirable":[160],"actions":[161,209],"would":[163],"result":[164],"from":[165],"strictly":[166],"following":[167],"before":[171],"has":[173],"been":[174],"fully":[175],"trained.":[176],"prove":[178],"this":[180],"type":[181],"expert":[185],"produces":[186],"supervision":[187],"leads":[189],"good":[191],"long-horizon":[192],"performance":[193],"resulting":[196],"empirically":[200],"demonstrate":[201,227],"still":[205],"dangerous":[207],"on-policy":[208],"unexpected":[211],"situations":[212],"during":[213,242],"training.":[214],"Our":[215],"empirical":[216],"results":[217],"on":[218],"set":[220],"challenging":[222],"simulated":[223],"aerial":[224],"vehicle":[225],"tasks":[226],"that,":[228],"compared":[229],"prior":[231],"methods,":[232],"learns":[234],"faster,":[235],"experiences":[236],"substantially":[237],"fewer":[238],"catastrophic":[239],"failures":[240],"(crashes)":[241],"training,":[243],"often":[245],"converges":[246],"better":[249]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
