{"id":"https://openalex.org/W4386505803","doi":"https://doi.org/10.1145/3587716.3587798","title":"Interpretable Reinforcement Learning of Behavior Trees","display_name":"Interpretable Reinforcement Learning of Behavior Trees","publication_year":2023,"publication_date":"2023-02-17","ids":{"openalex":"https://openalex.org/W4386505803","doi":"https://doi.org/10.1145/3587716.3587798"},"language":"en","primary_location":{"id":"doi:10.1145/3587716.3587798","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3587716.3587798","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 15th International Conference on Machine Learning and Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090228382","display_name":"Chenjing Zhao","orcid":"https://orcid.org/0000-0002-8026-9848"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chenjing Zhao","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0000-0002-8026-9848","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015335846","display_name":"Chuanshuai Deng","orcid":"https://orcid.org/0009-0004-8930-2183"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chuanshuai Deng","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0009-0004-8930-2183","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019941969","display_name":"Zhouquan Liu","orcid":"https://orcid.org/0009-0008-3691-9074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenghui Liu","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0009-0008-3691-9074","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063283510","display_name":"Jiexin Zhang","orcid":"https://orcid.org/0009-0003-5488-9386"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiexin Zhang","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0009-0003-5488-9386","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101706134","display_name":"Yunlong Wu","orcid":"https://orcid.org/0000-0002-6911-954X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunlong Wu","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0000-0002-6911-954X","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027361201","display_name":"Yanzhen Wang","orcid":"https://orcid.org/0000-0002-3104-4845"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanzhen Wang","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0000-0002-3104-4845","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102835517","display_name":"Xiaodong Yi","orcid":"https://orcid.org/0000-0003-2279-5417"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaodong Yi","raw_affiliation_strings":["Defense Innovation Institute, China"],"raw_orcid":"https://orcid.org/0000-0003-2279-5417","affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5090228382"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5112,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71666638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"492","last_page":"499"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9634000062942505,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.9777568578720093},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8828197121620178},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7460076212882996},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7378302216529846},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5729482173919678},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.49419930577278137},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.27755290269851685}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.9777568578720093},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8828197121620178},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7460076212882996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7378302216529846},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5729482173919678},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.49419930577278137},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.27755290269851685}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3587716.3587798","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3587716.3587798","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 15th International Conference on Machine Learning and Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7796496934","display_name":null,"funder_award_id":"61906212, 62102441, 9194830","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1503370304","https://openalex.org/W1524728167","https://openalex.org/W1576818901","https://openalex.org/W1608608607","https://openalex.org/W1963549352","https://openalex.org/W1994005705","https://openalex.org/W2007506196","https://openalex.org/W2163299954","https://openalex.org/W2261683202","https://openalex.org/W2521617762","https://openalex.org/W2550336733","https://openalex.org/W2567075875","https://openalex.org/W2898794805","https://openalex.org/W2909728541","https://openalex.org/W2954804306","https://openalex.org/W3145123113","https://openalex.org/W4233104380","https://openalex.org/W4244566287","https://openalex.org/W4288289200"],"related_works":["https://openalex.org/W1986582023","https://openalex.org/W2883749686","https://openalex.org/W4233452137","https://openalex.org/W2961085424","https://openalex.org/W4254857216","https://openalex.org/W2966829450","https://openalex.org/W4315864862","https://openalex.org/W4231626925","https://openalex.org/W2962829499","https://openalex.org/W4306674287"],"abstract_inverted_index":{"The":[0],"interpretability":[1,112],"of":[2,10,69,81,107,113,124,153],"reinforcement":[3],"learning":[4],"(RL)":[5],"algorithms":[6,103],"has":[7],"become":[8],"one":[9],"the":[11,32,48,64,78,95,105,111,117,120,151],"significant":[12],"challenges":[13],"for":[14],"artificial":[15],"intelligence":[16],"(AI)":[17],"researchers.":[18],"Behavior":[19],"Trees":[20],"(BTs)":[21],"have":[22,44],"enabled":[23],"developers":[24],"to":[25,46,51,54,63,109,130,137],"design":[26,80],"AI":[27],"policies":[28,57,96,133],"visually":[29],"and":[30,37,77,100,122,139],"comprehend":[31,138],"agent\u2019s":[33],"behaviors":[34],"in":[35,104,141,144],"robotics":[36],"computer":[38],"games.":[39],"Combining":[40],"their":[41],"strengths,":[42],"researchers":[43],"proposed":[45],"utilize":[47],"RL":[49],"algorithm":[50],"generate":[52],"BTs":[53,108,125],"present":[55,88],"learned":[56],"automatically.":[58],"Existing":[59],"methods":[60,91],"are":[61,135],"devoted":[62],"incremental":[65],"generation":[66,90],"or":[67],"modification":[68],"pre-designed":[70],"BTs.":[71,83],"These":[72],"efforts":[73],"necessitate":[74],"specialized":[75],"knowledge":[76],"manual":[79],"initial":[82],"In":[84],"this":[85],"paper,":[86],"we":[87],"intelligent":[89],"that":[92,134],"directly":[93],"represent":[94],"generated":[97],"by":[98],"Q-learning":[99],"its":[101],"derived":[102],"form":[106],"enhance":[110],"RL.":[114],"We":[115],"investigate":[116],"tradeoff":[118],"between":[119],"size":[121],"performance":[123],"while":[126],"attaining":[127],"interpretability,":[128],"intending":[129],"obtain":[131],"balanced":[132],"easy":[136],"good":[140],"performance.":[142],"Evaluations":[143],"several":[145],"classic":[146],"OpenAI":[147],"Gym":[148],"environments":[149],"validate":[150],"effectiveness":[152],"our":[154],"methods.":[155]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
