{"id":"https://openalex.org/W2793020180","doi":"https://doi.org/10.1109/tnnls.2017.2764499","title":"Efficient Reinforcement Learning via Probabilistic Trajectory Optimization","display_name":"Efficient Reinforcement Learning via Probabilistic Trajectory Optimization","publication_year":2018,"publication_date":"2018-03-05","ids":{"openalex":"https://openalex.org/W2793020180","doi":"https://doi.org/10.1109/tnnls.2017.2764499","mag":"2793020180","pmid":"https://pubmed.ncbi.nlm.nih.gov/29993609"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2017.2764499","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2017.2764499","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091291585","display_name":"Yunpeng Pan","orcid":"https://orcid.org/0000-0002-3373-0769"},"institutions":[{"id":"https://openalex.org/I72427458","display_name":"JDSU (United States)","ror":"https://ror.org/01a5v8x09","country_code":"US","type":"company","lineage":["https://openalex.org/I72427458"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yunpeng Pan","raw_affiliation_strings":["JD-X Silicon Valley Research Center, JD.com American Technologies Corporation, Santa Clara, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-3373-0769","affiliations":[{"raw_affiliation_string":"JD-X Silicon Valley Research Center, JD.com American Technologies Corporation, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I72427458"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043052033","display_name":"George I. Boutselis","orcid":"https://orcid.org/0000-0002-0930-3257"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George I. Boutselis","raw_affiliation_strings":["Department of Aerospace Engineering, Georgia Institute of Technology, Atlanta, GA, USA"],"raw_orcid":"https://orcid.org/0000-0002-0930-3257","affiliations":[{"raw_affiliation_string":"Department of Aerospace Engineering, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044505993","display_name":"Evangelos A. Theodorou","orcid":"https://orcid.org/0000-0002-0834-5738"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Evangelos A. Theodorou","raw_affiliation_strings":["Department of Aerospace Engineering, Georgia Institute of Technology, Atlanta, GA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Aerospace Engineering, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.3654,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.91077318,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"29","issue":"11","first_page":"5459","last_page":"5474"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11236","display_name":"Control Systems and Identification","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7346084117889404},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6544007658958435},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6282708644866943},{"id":"https://openalex.org/keywords/differential-dynamic-programming","display_name":"Differential dynamic programming","score":0.6270055770874023},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6260408759117126},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5929532051086426},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5740056037902832},{"id":"https://openalex.org/keywords/trajectory-optimization","display_name":"Trajectory optimization","score":0.5319538116455078},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.5085726976394653},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.48034560680389404},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4370565414428711},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3720989227294922},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34898829460144043},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34283921122550964},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.3215405344963074},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.278123140335083}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7346084117889404},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6544007658958435},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6282708644866943},{"id":"https://openalex.org/C92292787","wikidata":"https://www.wikidata.org/wiki/Q5275342","display_name":"Differential dynamic programming","level":3,"score":0.6270055770874023},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6260408759117126},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5929532051086426},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5740056037902832},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.5319538116455078},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.5085726976394653},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.48034560680389404},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4370565414428711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3720989227294922},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34898829460144043},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34283921122550964},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.3215405344963074},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.278123140335083},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2017.2764499","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2017.2764499","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:29993609","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/29993609","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8430565525","display_name":"NRI: Information-Theoretic Trajectory Optimization for Motion Planning and Control with Applications to Space Proximity Operations","funder_award_id":"1426945","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G916882608","display_name":null,"funder_award_id":"W911NF-16-1-0390","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":85,"referenced_works":["https://openalex.org/W36550046","https://openalex.org/W658381347","https://openalex.org/W1499669280","https://openalex.org/W1589395967","https://openalex.org/W1601974704","https://openalex.org/W1603365166","https://openalex.org/W1746819321","https://openalex.org/W1854776945","https://openalex.org/W1925816294","https://openalex.org/W1964946446","https://openalex.org/W1998179438","https://openalex.org/W2012587148","https://openalex.org/W2018705428","https://openalex.org/W2019941214","https://openalex.org/W2025752438","https://openalex.org/W2046633936","https://openalex.org/W2056257343","https://openalex.org/W2058702345","https://openalex.org/W2060645918","https://openalex.org/W2070676433","https://openalex.org/W2072378688","https://openalex.org/W2085330455","https://openalex.org/W2099768828","https://openalex.org/W2104533781","https://openalex.org/W2108631580","https://openalex.org/W2109944946","https://openalex.org/W2111999018","https://openalex.org/W2117629901","https://openalex.org/W2122377431","https://openalex.org/W2125612430","https://openalex.org/W2129564505","https://openalex.org/W2130913800","https://openalex.org/W2140135625","https://openalex.org/W2146851580","https://openalex.org/W2151268438","https://openalex.org/W2152842312","https://openalex.org/W2153917213","https://openalex.org/W2156045111","https://openalex.org/W2162988958","https://openalex.org/W2163000344","https://openalex.org/W2164429173","https://openalex.org/W2167856595","https://openalex.org/W2168318917","https://openalex.org/W2169209873","https://openalex.org/W2170912685","https://openalex.org/W2172114485","https://openalex.org/W2211399972","https://openalex.org/W2296226496","https://openalex.org/W2400458653","https://openalex.org/W2468321057","https://openalex.org/W2509546504","https://openalex.org/W2510352861","https://openalex.org/W2571024504","https://openalex.org/W2615346176","https://openalex.org/W2735992950","https://openalex.org/W2742077738","https://openalex.org/W2770616710","https://openalex.org/W2904816695","https://openalex.org/W2949368738","https://openalex.org/W2963458639","https://openalex.org/W2963945780","https://openalex.org/W3005581722","https://openalex.org/W4205513846","https://openalex.org/W4211049957","https://openalex.org/W4297785478","https://openalex.org/W6601440555","https://openalex.org/W6629881138","https://openalex.org/W6635991215","https://openalex.org/W6640290305","https://openalex.org/W6674989108","https://openalex.org/W6675740164","https://openalex.org/W6676444349","https://openalex.org/W6676801769","https://openalex.org/W6678278778","https://openalex.org/W6679524480","https://openalex.org/W6680657880","https://openalex.org/W6683259483","https://openalex.org/W6685013676","https://openalex.org/W6685331716","https://openalex.org/W6687965526","https://openalex.org/W6719477708","https://openalex.org/W6742417552","https://openalex.org/W6746274434","https://openalex.org/W7038125596","https://openalex.org/W7066478640"],"related_works":["https://openalex.org/W1932159282","https://openalex.org/W4285537323","https://openalex.org/W4281717838","https://openalex.org/W2379312070","https://openalex.org/W4315488747","https://openalex.org/W2136173754","https://openalex.org/W2212478920","https://openalex.org/W2007971149","https://openalex.org/W2110050003","https://openalex.org/W1676686791"],"abstract_inverted_index":{"We":[0,89,124],"present":[1],"a":[2,37,54,59,82,107,139,147],"trajectory":[3,39],"optimization":[4,109],"approach":[5],"to":[6,81,102,120],"reinforcement":[7],"learning":[8,151],"in":[9,40],"continuous":[10],"state":[11],"and":[12,30,57,106,128,155],"action":[13],"spaces,":[14],"called":[15],"probabilistic":[16],"differential":[17],"dynamic":[18,33],"programming":[19,34],"(PDDP).":[20],"Our":[21],"method":[22],"represents":[23],"systems":[24],"dynamics":[25],"using":[26,134],"Gaussian":[27,41],"processes":[28],"(GPs),":[29],"performs":[31],"local":[32],"iteratively":[35],"around":[36],"nominal":[38],"belief":[42],"spaces.":[43],"Different":[44],"from":[45],"model-based":[46],"policy":[47,55,62,142],"search":[48,143],"methods,":[49],"PDDP":[50,145],"does":[51],"not":[52],"require":[53],"parameterization":[56],"learns":[58],"time-varying":[60],"control":[61],"via":[63],"successive":[64],"forward-backward":[65],"sweeps.":[66],"A":[67],"convergence":[68],"analysis":[69],"of":[70,130,150],"the":[71,99,113,126,131],"iterative":[72],"scheme":[73],"is":[74],"given,":[75],"showing":[76],"that":[77,91],"our":[78],"algorithm":[79,133],"converges":[80],"stationary":[83],"point":[84],"globally":[85],"under":[86],"certain":[87],"conditions.":[88],"show":[90],"prior":[92],"model":[93],"knowledge":[94],"can":[95,117],"be":[96,118],"incorporated":[97],"into":[98],"proposed":[100,132],"framework":[101],"speed":[103],"up":[104],"learning,":[105],"generalized":[108],"criterion":[110],"based":[111],"on":[112],"predicted":[114],"cost":[115],"distribution":[116],"employed":[119],"enable":[121],"risk-sensitive":[122],"learning.":[123],"demonstrate":[125],"effectiveness":[127],"efficiency":[129],"nontrivial":[135],"tasks.":[136],"Compared":[137],"with":[138],"state-of-the-art":[140],"GP-based":[141],"method,":[144],"offers":[146],"superior":[148],"combination":[149],"speed,":[152],"data":[153],"efficiency,":[154],"applicability.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3}],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2025-10-10T00:00:00"}