{"id":"https://openalex.org/W3001359952","doi":"https://doi.org/10.1109/icra40945.2020.9197058","title":"Local Policy Optimization for Trajectory-Centric Reinforcement Learning","display_name":"Local Policy Optimization for Trajectory-Centric Reinforcement Learning","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W3001359952","doi":"https://doi.org/10.1109/icra40945.2020.9197058","mag":"3001359952"},"language":"en","primary_location":{"id":"doi:10.1109/icra40945.2020.9197058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9197058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2001.08092","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065878109","display_name":"Patrik Kolaric","orcid":"https://orcid.org/0000-0002-4332-3462"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Patrik Kolaric","raw_affiliation_strings":["UTA Research Institute, University of Texas at Arlington, Fort Worth, TX, USA","University of Texas at Arlington"],"affiliations":[{"raw_affiliation_string":"UTA Research Institute, University of Texas at Arlington, Fort Worth, TX, USA","institution_ids":[]},{"raw_affiliation_string":"University of Texas at Arlington","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010530960","display_name":"Devesh K. Jha","orcid":"https://orcid.org/0000-0002-7843-9545"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]},{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Devesh K. Jha","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","University of Texas at Arlington"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"University of Texas at Arlington","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038062267","display_name":"Arvind U. Raghunathan","orcid":"https://orcid.org/0000-0003-3173-3875"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Arvind U. Raghunathan","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","Mitsubishi Electric Research Laboratories,#TAB#"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories,#TAB#","institution_ids":["https://openalex.org/I4210133125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016137188","display_name":"Frank L. Lewis","orcid":"https://orcid.org/0000-0003-4074-1615"},"institutions":[{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Frank L. Lewis","raw_affiliation_strings":["UTA Research Institute, University of Texas at Arlington, Fort Worth, TX, USA","Mitsubishi Electric Research Laboratories,#TAB#"],"affiliations":[{"raw_affiliation_string":"UTA Research Institute, University of Texas at Arlington, Fort Worth, TX, USA","institution_ids":[]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories,#TAB#","institution_ids":["https://openalex.org/I4210133125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053957972","display_name":"Mouhacine Benosman","orcid":"https://orcid.org/0000-0002-0154-454X"},"institutions":[{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]},{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Mouhacine Benosman","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","Mitsubishi Electric Research Laboratories,#TAB#"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories,#TAB#","institution_ids":["https://openalex.org/I4210133125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062272788","display_name":"Diego Romeres","orcid":"https://orcid.org/0000-0002-8603-2438"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Diego Romeres","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","Mitsubishi Electric Research Laboratories,#TAB#"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories,#TAB#","institution_ids":["https://openalex.org/I4210133125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054931604","display_name":"Daniel Nikovski","orcid":"https://orcid.org/0000-0003-2919-645X"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Daniel Nikovski","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","Mitsubishi Electric Research Laboratories,#TAB#"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories,#TAB#","institution_ids":["https://openalex.org/I4210133125"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5065878109"],"corresponding_institution_ids":["https://openalex.org/I189196454"],"apc_list":null,"apc_paid":null,"fwci":0.2743,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6203214,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"5094","last_page":"5100"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.842666506767273},{"id":"https://openalex.org/keywords/trajectory-optimization","display_name":"Trajectory optimization","score":0.840343713760376},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8240032196044922},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6862421035766602},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5562364459037781},{"id":"https://openalex.org/keywords/nonlinear-programming","display_name":"Nonlinear programming","score":0.5337888598442078},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.5271764993667603},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5225769281387329},{"id":"https://openalex.org/keywords/local-optimum","display_name":"Local optimum","score":0.4791383445262909},{"id":"https://openalex.org/keywords/global-optimization","display_name":"Global optimization","score":0.4499174952507019},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.3899586796760559},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3725128173828125},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.25486379861831665},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18407166004180908},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1122279167175293}],"concepts":[{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.842666506767273},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.840343713760376},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8240032196044922},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6862421035766602},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5562364459037781},{"id":"https://openalex.org/C115527620","wikidata":"https://www.wikidata.org/wiki/Q769909","display_name":"Nonlinear programming","level":3,"score":0.5337888598442078},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.5271764993667603},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5225769281387329},{"id":"https://openalex.org/C141934464","wikidata":"https://www.wikidata.org/wiki/Q3305386","display_name":"Local optimum","level":2,"score":0.4791383445262909},{"id":"https://openalex.org/C164752517","wikidata":"https://www.wikidata.org/wiki/Q5570875","display_name":"Global optimization","level":2,"score":0.4499174952507019},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.3899586796760559},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3725128173828125},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.25486379861831665},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18407166004180908},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1122279167175293},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra40945.2020.9197058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9197058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2001.08092","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.08092","pdf_url":"https://arxiv.org/pdf/2001.08092","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3001359952","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2001.08092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2001.08092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2001.08092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2001.08092","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.08092","pdf_url":"https://arxiv.org/pdf/2001.08092","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3001359952.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1502922572","https://openalex.org/W1543439990","https://openalex.org/W1742307920","https://openalex.org/W1925816294","https://openalex.org/W2046210054","https://openalex.org/W2087617385","https://openalex.org/W2100538121","https://openalex.org/W2104733512","https://openalex.org/W2121863487","https://openalex.org/W2136719407","https://openalex.org/W2140135625","https://openalex.org/W2148247550","https://openalex.org/W2164278908","https://openalex.org/W2257979135","https://openalex.org/W2529601334","https://openalex.org/W2738778707","https://openalex.org/W2766447205","https://openalex.org/W2786936262","https://openalex.org/W2798766386","https://openalex.org/W2953708620","https://openalex.org/W2962872206","https://openalex.org/W2963630259","https://openalex.org/W2964161785","https://openalex.org/W2967651253","https://openalex.org/W2969101033","https://openalex.org/W2972785326","https://openalex.org/W4250739957","https://openalex.org/W4292363360","https://openalex.org/W6629804754","https://openalex.org/W6640290305","https://openalex.org/W6675999342","https://openalex.org/W6680657880","https://openalex.org/W6682849425","https://openalex.org/W6764980988","https://openalex.org/W6765032376","https://openalex.org/W6765478815","https://openalex.org/W6768147641"],"related_works":["https://openalex.org/W3091489580","https://openalex.org/W311349729","https://openalex.org/W3158234969","https://openalex.org/W3119289712","https://openalex.org/W2564368211","https://openalex.org/W3205295347","https://openalex.org/W2183483151","https://openalex.org/W2921180173","https://openalex.org/W2184847629","https://openalex.org/W62487724","https://openalex.org/W2312892126","https://openalex.org/W3111862348","https://openalex.org/W2796922012","https://openalex.org/W2145537262","https://openalex.org/W3101622906","https://openalex.org/W3197927210","https://openalex.org/W3004785596","https://openalex.org/W2153054053","https://openalex.org/W3096721080","https://openalex.org/W2970112030"],"abstract_inverted_index":{"The":[0],"goal":[1],"of":[2,54,105,125,139],"this":[3],"paper":[4],"is":[5,29,118],"to":[6,18,71,101],"present":[7],"a":[8,43,52,65,113],"method":[9],"for":[10,22,38,132],"simultaneous":[11],"trajectory":[12,80,106],"and":[13,49,60,108],"local":[14,20,109],"stabilizing":[15],"policy":[16,36,110],"optimization":[17,37,81,107,115],"generate":[19],"policies":[21],"trajectory-centric":[23],"model-based":[24],"reinforcement":[25],"learning":[26],"(MBRL).":[27],"This":[28],"motivated":[30],"by":[31,96],"the":[32,74,92,103,140],"fact":[33],"that":[34],"global":[35,66],"non-linear":[39],"systems":[40],"could":[41],"be":[42],"very":[44,86],"challenging":[45],"problem":[46,104],"both":[47],"algorithmically":[48],"numerically.":[50],"However,":[51],"lot":[53],"robotic":[55],"manipulation":[56],"tasks":[57],"are":[58],"trajectory-centric,":[59],"thus":[61],"do":[62],"not":[63],"require":[64],"model":[67,76],"or":[68],"policy.":[69],"Due":[70],"inaccuracies":[72],"in":[73,85],"learned":[75],"estimates,":[77],"an":[78,123],"open-loop":[79],"process":[82],"mostly":[83],"results":[84,131],"poor":[87],"performance":[88,138],"when":[89],"used":[90],"on":[91],"real":[93],"system.":[94],"Motivated":[95],"these":[97],"problems,":[98],"we":[99],"try":[100],"formulate":[102],"synthesis":[111],"as":[112,122,134,136],"single":[114],"problem.":[116],"It":[117],"then":[119],"solved":[120],"simultaneously":[121],"instance":[124],"nonlinear":[126],"programming.":[127],"We":[128],"provide":[129],"some":[130,144],"analysis":[133],"well":[135],"achieved":[137],"proposed":[141],"technique":[142],"under":[143],"simplifying":[145],"assumptions.":[146]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
