{"id":"https://openalex.org/W2995074243","doi":"https://doi.org/10.1126/scirobotics.aay6276","title":"A formal methods approach to interpretable reinforcement learning for robotic planning","display_name":"A formal methods approach to interpretable reinforcement learning for robotic planning","publication_year":2019,"publication_date":"2019-12-18","ids":{"openalex":"https://openalex.org/W2995074243","doi":"https://doi.org/10.1126/scirobotics.aay6276","mag":"2995074243","pmid":"https://pubmed.ncbi.nlm.nih.gov/33137718"},"language":"en","primary_location":{"id":"doi:10.1126/scirobotics.aay6276","is_oa":false,"landing_page_url":"https://doi.org/10.1126/scirobotics.aay6276","pdf_url":null,"source":{"id":"https://openalex.org/S4210213233","display_name":"Science Robotics","issn_l":"2470-9476","issn":["2470-9476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315823","host_organization_name":"American Association for the Advancement of Science","host_organization_lineage":["https://openalex.org/P4310315823"],"host_organization_lineage_names":["American Association for the Advancement of Science"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Science Robotics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100654890","display_name":"Xiao Li","orcid":"https://orcid.org/0000-0003-0756-7483"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiao Li","raw_affiliation_strings":["Department of Mechanical Engineering, Boston University, Boston, MA, USA"],"raw_orcid":"https://orcid.org/0000-0003-0756-7483","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007227323","display_name":"Zachary Serlin","orcid":"https://orcid.org/0000-0002-0975-2204"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zachary Serlin","raw_affiliation_strings":["Department of Mechanical Engineering, Boston University, Boston, MA, USA"],"raw_orcid":"https://orcid.org/0000-0002-0975-2204","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059211235","display_name":"Guang Yang","orcid":"https://orcid.org/0000-0003-1181-4862"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guang Yang","raw_affiliation_strings":["Division of Systems Engineering, Boston University, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Division of Systems Engineering, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086742095","display_name":"C\u0103lin Belta","orcid":"https://orcid.org/0000-0002-7141-2657"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Calin Belta","raw_affiliation_strings":["Department of Mechanical Engineering, Boston University, Boston, MA, USA","Division of Systems Engineering, Boston University, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046"]},{"raw_affiliation_string":"Division of Systems Engineering, Boston University, Boston, MA, USA","institution_ids":["https://openalex.org/I111088046"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100654890"],"corresponding_institution_ids":["https://openalex.org/I111088046"],"apc_list":null,"apc_paid":null,"fwci":6.5083,"has_fulltext":false,"cited_by_count":107,"citation_normalized_percentile":{"value":0.97288566,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"4","issue":"37","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7765282392501831},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5576385259628296},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5544386506080627},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5449056625366211},{"id":"https://openalex.org/keywords/formal-language","display_name":"Formal language","score":0.5311699509620667},{"id":"https://openalex.org/keywords/formal-learning","display_name":"Formal learning","score":0.4994771480560303},{"id":"https://openalex.org/keywords/formal-methods","display_name":"Formal methods","score":0.4972980320453644},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.20163431763648987},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.19595640897750854},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.12532907724380493},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.061386823654174805}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7765282392501831},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5576385259628296},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5544386506080627},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5449056625366211},{"id":"https://openalex.org/C146072743","wikidata":"https://www.wikidata.org/wiki/Q192161","display_name":"Formal language","level":2,"score":0.5311699509620667},{"id":"https://openalex.org/C2776181820","wikidata":"https://www.wikidata.org/wiki/Q8434","display_name":"Formal learning","level":2,"score":0.4994771480560303},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.4972980320453644},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.20163431763648987},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.19595640897750854},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.12532907724380493},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.061386823654174805}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1126/scirobotics.aay6276","is_oa":false,"landing_page_url":"https://doi.org/10.1126/scirobotics.aay6276","pdf_url":null,"source":{"id":"https://openalex.org/S4210213233","display_name":"Science Robotics","issn_l":"2470-9476","issn":["2470-9476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315823","host_organization_name":"American Association for the Advancement of Science","host_organization_lineage":["https://openalex.org/P4310315823"],"host_organization_lineage_names":["American Association for the Advancement of Science"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Science Robotics","raw_type":"journal-article"},{"id":"pmid:33137718","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33137718","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Science robotics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3526003910","display_name":null,"funder_award_id":"CMMI-1400167","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4048345806","display_name":null,"funder_award_id":"IIS-1723995","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1547304883","https://openalex.org/W1562936359","https://openalex.org/W1777239053","https://openalex.org/W1980569135","https://openalex.org/W2082511574","https://openalex.org/W2097942312","https://openalex.org/W2130290065","https://openalex.org/W2133180954","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2290772557","https://openalex.org/W2524638160","https://openalex.org/W2567705466","https://openalex.org/W2593845678","https://openalex.org/W2734058336","https://openalex.org/W2735010720","https://openalex.org/W2741248519","https://openalex.org/W2889711700","https://openalex.org/W2891503716","https://openalex.org/W2895196950","https://openalex.org/W2913775679","https://openalex.org/W2931553127","https://openalex.org/W2937276195","https://openalex.org/W2963575966","https://openalex.org/W2964514675","https://openalex.org/W2966183138","https://openalex.org/W2966735560","https://openalex.org/W3099352109","https://openalex.org/W4214717370"],"related_works":["https://openalex.org/W3022086173","https://openalex.org/W2323519609","https://openalex.org/W1546523042","https://openalex.org/W2110102808","https://openalex.org/W2240208030","https://openalex.org/W2138398927","https://openalex.org/W2165968349","https://openalex.org/W1921676103","https://openalex.org/W4400800542","https://openalex.org/W2672127762"],"abstract_inverted_index":{"Growing":[0],"interest":[1],"in":[2,155],"reinforcement":[3,56,128],"learning":[4,46,57,129],"approaches":[5],"to":[6,42,55,91,161],"robotic":[7,122,152],"planning":[8],"and":[9,15,37,94,124,146],"control":[10,24,133],"raises":[11],"concerns":[12],"of":[13,17,99,104,110],"predictability":[14],"safety":[16,102],"robot":[18],"behaviors":[19],"realized":[20],"solely":[21],"through":[22],"learned":[23],"policies.":[25],"In":[26],"addition,":[27],"formally":[28],"defining":[29],"reward":[30,79],"functions":[31],"for":[32,121,150],"complex":[33],"tasks":[34,123],"is":[35,140],"challenging,":[36],"faulty":[38],"rewards":[39],"are":[40,114],"prone":[41],"exploitation":[43],"by":[44],"the":[45,78,86,92,97,100,105,137],"agent.":[47],"Here,":[48],"we":[49,143],"propose":[50],"a":[51,61,72,115,151],"formal":[52,62],"methods":[53],"approach":[54],"that":[58,65],"(i)":[59],"provides":[60],"specification":[63],"language":[64],"integrates":[66],"high-level,":[67],"rich,":[68],"task":[69],"specifications":[70],"with":[71],"priori,":[73],"domain-specific":[74],"knowledge;":[75],"(ii)":[76],"makes":[77],"generation":[80,88],"process":[81,89],"easily":[82],"interpretable;":[83],"(iii)":[84],"guides":[85],"policy":[87],"according":[90],"specification;":[93],"(iv)":[95],"guarantees":[96],"satisfaction":[98],"(critical)":[101],"component":[103],"specification.":[106],"The":[107],"main":[108],"ingredients":[109],"our":[111],"computational":[112],"framework":[113,139],"predicate":[116],"temporal":[117],"logic":[118],"specifically":[119],"tailored":[120],"an":[125],"automaton-guided,":[126],"safe":[127],"algorithm":[130],"based":[131],"on":[132],"barrier":[134],"functions.":[135],"Although":[136],"proposed":[138],"quite":[141],"general,":[142],"motivate":[144],"it":[145,148],"illustrate":[147],"experimentally":[149],"cooking":[153],"task,":[154],"which":[156],"two":[157],"manipulators":[158],"worked":[159],"together":[160],"make":[162],"hot":[163],"dogs.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":23},{"year":2023,"cited_by_count":19},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-27T08:28:00.272161","created_date":"2025-10-10T00:00:00"}
