{"id":"https://openalex.org/W1923344279","doi":"https://doi.org/10.1109/icra.2016.7487175","title":"Learning deep control policies for autonomous aerial vehicles with MPC-guided policy search","display_name":"Learning deep control policies for autonomous aerial vehicles with MPC-guided policy search","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W1923344279","doi":"https://doi.org/10.1109/icra.2016.7487175","mag":"1923344279"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2016.7487175","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2016.7487175","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100669565","display_name":"Tianhao Zhang","orcid":"https://orcid.org/0000-0002-5939-3932"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tianhao Zhang","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064156720","display_name":"Gregory Kahn","orcid":"https://orcid.org/0000-0003-1771-6147"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory Kahn","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pieter Abbeel","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of California, Berkeley, 94720, United States of America","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100669565"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":47.6692,"has_fulltext":false,"cited_by_count":421,"citation_normalized_percentile":{"value":0.9994047,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"528","last_page":"535"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8224372863769531},{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.7436544299125671},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.655927300453186},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6344362497329712},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5704376697540283},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5675848722457886},{"id":"https://openalex.org/keywords/obstacle-avoidance","display_name":"Obstacle avoidance","score":0.5375032424926758},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.5011575222015381},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.46671006083488464},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4576645493507385},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.4569374620914459},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.42808374762535095},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.42323848605155945},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3411716818809509},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.28169554471969604},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.22445479035377502}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8224372863769531},{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.7436544299125671},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.655927300453186},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6344362497329712},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5704376697540283},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5675848722457886},{"id":"https://openalex.org/C6683253","wikidata":"https://www.wikidata.org/wiki/Q7075535","display_name":"Obstacle avoidance","level":4,"score":0.5375032424926758},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.5011575222015381},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.46671006083488464},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4576645493507385},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.4569374620914459},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.42808374762535095},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42323848605155945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3411716818809509},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.28169554471969604},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.22445479035377502},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra.2016.7487175","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2016.7487175","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W195033972","https://openalex.org/W658381347","https://openalex.org/W1520597402","https://openalex.org/W1522301498","https://openalex.org/W1570233100","https://openalex.org/W1576721254","https://openalex.org/W1605218591","https://openalex.org/W1665214252","https://openalex.org/W1704276703","https://openalex.org/W1757796397","https://openalex.org/W1931877416","https://openalex.org/W1969705022","https://openalex.org/W1977655452","https://openalex.org/W1980969546","https://openalex.org/W1981723834","https://openalex.org/W2007726556","https://openalex.org/W2012587148","https://openalex.org/W2025752438","https://openalex.org/W2028893450","https://openalex.org/W2035734362","https://openalex.org/W2051620263","https://openalex.org/W2062731992","https://openalex.org/W2087387266","https://openalex.org/W2087617385","https://openalex.org/W2091219706","https://openalex.org/W2104733512","https://openalex.org/W2109008048","https://openalex.org/W2121103318","https://openalex.org/W2125612430","https://openalex.org/W2126909264","https://openalex.org/W2139053308","https://openalex.org/W2147032798","https://openalex.org/W2150930292","https://openalex.org/W2155007355","https://openalex.org/W2156377376","https://openalex.org/W2158782408","https://openalex.org/W2161395589","https://openalex.org/W2167224731","https://openalex.org/W2167562394","https://openalex.org/W2169209873","https://openalex.org/W2281096776","https://openalex.org/W2591436094","https://openalex.org/W2962957031","https://openalex.org/W2964121744","https://openalex.org/W2964161785","https://openalex.org/W4205513846","https://openalex.org/W4298857966","https://openalex.org/W6607786297","https://openalex.org/W6631190155","https://openalex.org/W6633971062","https://openalex.org/W6637242042","https://openalex.org/W6637556697","https://openalex.org/W6637967152","https://openalex.org/W6640174482","https://openalex.org/W6675999342","https://openalex.org/W6678367057","https://openalex.org/W6682849425","https://openalex.org/W6683180588","https://openalex.org/W6684338915","https://openalex.org/W6685331716"],"related_works":["https://openalex.org/W2930076404","https://openalex.org/W2071957557","https://openalex.org/W2596413128","https://openalex.org/W1561646347","https://openalex.org/W2356867392","https://openalex.org/W2782776446","https://openalex.org/W2155948905","https://openalex.org/W2346323633","https://openalex.org/W1971413691","https://openalex.org/W3043170174"],"abstract_inverted_index":{"Model":[0],"predictive":[1],"control":[2,158],"(MPC)":[3],"is":[4,68,107,127,137],"an":[5,84,121],"effective":[6,85],"method":[7,180],"for":[8,52,186],"controlling":[9],"robotic":[10],"systems,":[11],"particularly":[12],"autonomous":[13],"aerial":[14],"vehicles":[15],"such":[16],"as":[17],"quadcopters.":[18],"However,":[19],"application":[20],"of":[21,33,101,163,171,175],"MPC":[22,94,106],"can":[23,37,46,156],"be":[24,38],"computationally":[25],"demanding,":[26],"and":[27,56,167,194],"typically":[28],"requires":[29],"estimating":[30],"the":[31,34,50,99,142,146,152,159,164,172],"state":[32,54,117,197],"system,":[35],"which":[36,136],"challenging":[39],"in":[40,47,98],"complex,":[41],"unstructured":[42],"environments.":[43],"Reinforcement":[44],"learning":[45,97,182],"principle":[48],"forego":[49],"need":[51],"explicit":[53,196],"estimation":[55,198],"acquire":[57],"a":[58,131,169,187],"policy":[59,86,103,155],"that":[60,75],"directly":[61],"maps":[62],"sensor":[63],"readings":[64],"to":[65,70,72,78,92,109,129,139],"actions,":[66],"but":[67],"difficult":[69],"apply":[71],"unstable":[73],"systems":[74],"are":[76],"liable":[77],"fail":[79],"catastrophically":[80],"during":[81],"training":[82,113,123],"before":[83],"has":[87],"been":[88],"found.":[89],"We":[90,177],"propose":[91],"combine":[93],"with":[95],"reinforcement":[96],"framework":[100],"guided":[102],"search,":[104],"where":[105],"used":[108,128],"generate":[110],"data":[111,126],"at":[112,168,199],"time,":[114],"under":[115],"full":[116,165],"observations":[118,144],"provided":[119],"by":[120,181],"instrumented":[122],"environment.":[124],"This":[125],"train":[130],"deep":[132],"neural":[133,153],"network":[134,154],"policy,":[135],"allowed":[138],"access":[140],"only":[141],"raw":[143],"from":[145],"vehicle's":[147],"onboard":[148,192],"sensors.":[149],"After":[150],"training,":[151],"successfully":[157],"robot":[160],"without":[161],"knowledge":[162],"state,":[166],"fraction":[170],"computational":[173],"cost":[174],"MPC.":[176],"evaluate":[178],"our":[179],"obstacle":[183],"avoidance":[184],"policies":[185],"simulated":[188,191],"quadrotor,":[189],"using":[190],"sensors":[193],"no":[195],"test":[200],"time.":[201]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":31},{"year":2023,"cited_by_count":28},{"year":2022,"cited_by_count":47},{"year":2021,"cited_by_count":57},{"year":2020,"cited_by_count":64},{"year":2019,"cited_by_count":92},{"year":2018,"cited_by_count":42},{"year":2017,"cited_by_count":36},{"year":2016,"cited_by_count":8}],"updated_date":"2026-02-27T16:54:17.756197","created_date":"2025-10-10T00:00:00"}
