{"id":"https://openalex.org/W3111928425","doi":"https://doi.org/10.1145/3466618","title":"How to Train Your Quadrotor: A Framework for Consistently Smooth and Responsive Flight Control via Reinforcement Learning","display_name":"How to Train Your Quadrotor: A Framework for Consistently Smooth and Responsive Flight Control via Reinforcement Learning","publication_year":2021,"publication_date":"2021-09-22","ids":{"openalex":"https://openalex.org/W3111928425","doi":"https://doi.org/10.1145/3466618","mag":"3111928425"},"language":"en","primary_location":{"id":"doi:10.1145/3466618","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3466618","pdf_url":null,"source":{"id":"https://openalex.org/S2506189754","display_name":"ACM Transactions on Cyber-Physical Systems","issn_l":"2378-962X","issn":["2378-962X","2378-9638"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Cyber-Physical Systems","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.06656","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038185726","display_name":"Siddharth Mysore","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Siddharth Mysore","raw_affiliation_strings":["Boston University, Boston, MA","Boston University Boston, MA"],"affiliations":[{"raw_affiliation_string":"Boston University, Boston, MA","institution_ids":[]},{"raw_affiliation_string":"Boston University Boston, MA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058159822","display_name":"Bassel Mabsout","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bassel Mabsout","raw_affiliation_strings":["Boston University, Boston, MA","Boston University Boston, MA"],"affiliations":[{"raw_affiliation_string":"Boston University, Boston, MA","institution_ids":[]},{"raw_affiliation_string":"Boston University Boston, MA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075906727","display_name":"Kate Saenko","orcid":"https://orcid.org/0000-0002-7564-7218"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kate Saenko","raw_affiliation_strings":["Boston University, Boston, MA","Boston University Boston, MA"],"affiliations":[{"raw_affiliation_string":"Boston University, Boston, MA","institution_ids":[]},{"raw_affiliation_string":"Boston University Boston, MA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035353750","display_name":"Renato Mancuso","orcid":"https://orcid.org/0000-0003-3558-5216"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Renato Mancuso","raw_affiliation_strings":["Boston University, Boston, MA","Boston University Boston, MA"],"affiliations":[{"raw_affiliation_string":"Boston University, Boston, MA","institution_ids":[]},{"raw_affiliation_string":"Boston University Boston, MA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5038185726"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01067947,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"5","issue":"4","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8969007730484009},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7503319978713989},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7107385396957397},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.670893669128418},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5577182769775391},{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.5026187896728516},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3498556613922119},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.08531749248504639}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8969007730484009},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7503319978713989},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7107385396957397},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.670893669128418},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5577182769775391},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.5026187896728516},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3498556613922119},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.08531749248504639},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3466618","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3466618","pdf_url":null,"source":{"id":"https://openalex.org/S2506189754","display_name":"ACM Transactions on Cyber-Physical Systems","issn_l":"2378-962X","issn":["2378-962X","2378-9638"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Cyber-Physical Systems","raw_type":"journal-article"},{"id":"mag:3111928425","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2012.06656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:null:2144/43028","is_oa":false,"landing_page_url":"https://arxiv.org/abs/2012.06656","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2012.06656","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2012.06656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"mag:3111928425","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2012.06656","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W157375909","https://openalex.org/W1515851193","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1966832848","https://openalex.org/W2066260126","https://openalex.org/W2072916763","https://openalex.org/W2119717200","https://openalex.org/W2151726636","https://openalex.org/W2155027007","https://openalex.org/W2158782408","https://openalex.org/W2167340365","https://openalex.org/W2178948257","https://openalex.org/W2257979135","https://openalex.org/W2271840356","https://openalex.org/W2402144811","https://openalex.org/W2462906003","https://openalex.org/W2733312032","https://openalex.org/W2736601468","https://openalex.org/W2775954438","https://openalex.org/W2793955514","https://openalex.org/W2797527950","https://openalex.org/W2809668646","https://openalex.org/W2890803796","https://openalex.org/W2913859679","https://openalex.org/W2950596486","https://openalex.org/W2962890638","https://openalex.org/W2962902376","https://openalex.org/W2963641140","https://openalex.org/W2963864421","https://openalex.org/W2968983352","https://openalex.org/W2972646073","https://openalex.org/W2982316857","https://openalex.org/W3106462682","https://openalex.org/W3139377883"],"related_works":["https://openalex.org/W3199764406","https://openalex.org/W2985871261","https://openalex.org/W2910219310","https://openalex.org/W3151079898","https://openalex.org/W2952672470","https://openalex.org/W3129896193","https://openalex.org/W1863534978","https://openalex.org/W3203483019","https://openalex.org/W2950622182","https://openalex.org/W2403742485","https://openalex.org/W2894383471","https://openalex.org/W2904263972","https://openalex.org/W2919334316","https://openalex.org/W3101442004","https://openalex.org/W2922299896","https://openalex.org/W2964310273","https://openalex.org/W3045280543","https://openalex.org/W3093541907","https://openalex.org/W3107329888","https://openalex.org/W2968652061"],"abstract_inverted_index":{"We":[0,193,206],"focus":[1],"on":[2,24,196,286],"the":[3,45,100,130,145,186,197,212,265,273],"problem":[4,35,61,290],"of":[5,55,76,95,107,132,144,155,181,188,204,216,267],"reliably":[6],"training":[7,83,126,165],"Reinforcement":[8],"Learning":[9,121],"(RL)":[10],"models":[11],"(agents)":[12],"for":[13,40,123,164,168],"stable":[14],"low-level":[15,169],"control":[16,42,46,70,78],"in":[17,36,69,86,109,149,218,237,278],"embedded":[18],"systems":[19],"and":[20,72,174,233,260],"test":[21],"our":[22,156,268],"methods":[23],"a":[25,59,114,161,179,250,282,287],"high-performance,":[26],"custom-built":[27],"quadrotor":[28,200],"platform.":[29],"A":[30],"common":[31],"but":[32],"often":[33],"under-studied":[34],"developing":[37],"RL":[38,84,110,166,219],"agents":[39,85,134,167,228,243,271],"continuous":[41],"is":[43,97,141,160,223,292],"that":[44,128,184,208,229,291],"policies":[47],"developed":[48,201],"are":[49,79,230,272],"not":[50],"always":[51],"smooth.":[52],"This":[53],"lack":[54],"smoothness":[56,217],"can":[57,67],"be":[58],"major":[60],"when":[62,82,135],"learning":[63],"controllers":[64,276],"as":[65,99,202],"it":[66],"result":[68],"instability":[71,108],"hardware":[73],"failure.":[74],"Issues":[75],"noisy":[77],"further":[80],"accentuated":[81],"simulation":[87,279],"due":[88],"to":[89,137,190,225,246,280],"simulators":[90],"ultimately":[91],"being":[92],"imperfect":[93],"representations":[94],"reality\u2014what":[96],"known":[98],"reality":[101],"gap":[102],".":[103],"To":[104,264],"combat":[105],"issues":[106,215],"agents,":[111],"we":[112],"propose":[113],"systematic":[115],"framework,":[116],"REinforcement-based":[117],"transferable":[118],"Agents":[119],"through":[120],"(RE+AL),":[122],"designing":[124],"simulated":[125],"environments":[127],"preserve":[129],"quality":[131,239],"trained":[133,277],"transferred":[136],"real":[138,191],"platforms.":[139],"RE+AL":[140,172,195,209,222,242,270],"an":[142],"evolution":[143],"Neuroflight":[146,159,176,189],"infrastructure":[147],"detailed":[148],"technical":[150],"reports":[151],"prepared":[152],"by":[153,177],"members":[154],"research":[157],"group.":[158],"state-of-the-art":[162],"framework":[163],"attitude":[170],"control.":[171,296],"improves":[173],"completes":[175],"solving":[178],"number":[180],"important":[182],"limitations":[183],"hindered":[185],"deployment":[187],"hardware.":[192],"benchmark":[194],"NF1":[198],"racing":[199],"part":[203],"Neuroflight.":[205],"demonstrate":[207],"significantly":[210],"mitigates":[211],"previously":[213],"observed":[214],"agents.":[220],"Additionally,":[221],"shown":[224],"consistently":[226],"train":[227],"flight":[231],"capable":[232],"with":[234,254,294],"minimal":[235],"degradation":[236],"controller":[238,285],"upon":[240],"transfer.":[241],"also":[244],"learn":[245],"perform":[247],"better":[248,255],"than":[249],"tuned":[251],"PID":[252,284],"controller,":[253],"tracking":[256],"errors,":[257],"smoother":[258],"control,":[259],"reduced":[261],"power":[262],"consumption.":[263],"best":[266],"knowledge,":[269],"first":[274],"RL-based":[275],"outperform":[281],"well-tuned":[283],"real-world":[288],"controls":[289],"solvable":[293],"classical":[295]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2021-09-27T00:00:00"}
