{"id":"https://openalex.org/W4389667323","doi":"https://doi.org/10.1109/iros55552.2023.10342319","title":"Improving the Performance of Backward Chained Behavior Trees that use Reinforcement Learning","display_name":"Improving the Performance of Backward Chained Behavior Trees that use Reinforcement Learning","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389667323","doi":"https://doi.org/10.1109/iros55552.2023.10342319"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342319","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iros55552.2023.10342319","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001360476","display_name":"Mart Karta\u0161ev","orcid":"https://orcid.org/0000-0001-8264-611X"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Mart Kartasev","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH),Robotics, Perception and Learning Lab.,Sweden","Robotics, Perception and Learning Lab., School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH), Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH),Robotics, Perception and Learning Lab.,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"Robotics, Perception and Learning Lab., School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH), Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015168455","display_name":"Justin Sal\u00e9r","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Justin Sal\u00e9r","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH),Robotics, Perception and Learning Lab.,Sweden","Robotics, Perception and Learning Lab., School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH), Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH),Robotics, Perception and Learning Lab.,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"Robotics, Perception and Learning Lab., School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH), Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070754732","display_name":"Petter \u00d6gren","orcid":"https://orcid.org/0000-0002-7714-928X"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Petter \u00d6gren","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH),Robotics, Perception and Learning Lab.,Sweden","Robotics, Perception and Learning Lab., School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH), Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH),Robotics, Perception and Learning Lab.,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"Robotics, Perception and Learning Lab., School of Electrical Engineering and Computer Science, Royal Institute of Technology (KTH), Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3263,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.67020983,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1572","last_page":"1579"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9375,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chaining","display_name":"Chaining","score":0.8713546395301819},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.807616114616394},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.792465329170227},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6584265232086182},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6541298031806946},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6435325145721436},{"id":"https://openalex.org/keywords/mathematical-proof","display_name":"Mathematical proof","score":0.5515239238739014},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5425553321838379},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5037710070610046},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.44659748673439026},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.32383978366851807},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24739018082618713},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10098832845687866}],"concepts":[{"id":"https://openalex.org/C49020025","wikidata":"https://www.wikidata.org/wiki/Q1059099","display_name":"Chaining","level":2,"score":0.8713546395301819},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.807616114616394},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.792465329170227},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6584265232086182},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6541298031806946},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6435325145721436},{"id":"https://openalex.org/C108710211","wikidata":"https://www.wikidata.org/wiki/Q11538","display_name":"Mathematical proof","level":2,"score":0.5515239238739014},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5425553321838379},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5037710070610046},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.44659748673439026},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32383978366851807},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24739018082618713},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10098832845687866},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros55552.2023.10342319","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iros55552.2023.10342319","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1524728167","https://openalex.org/W2007506196","https://openalex.org/W2261683202","https://openalex.org/W2294712518","https://openalex.org/W2328646735","https://openalex.org/W2465895732","https://openalex.org/W2521617762","https://openalex.org/W2548961765","https://openalex.org/W2736601468","https://openalex.org/W2766642606","https://openalex.org/W2784013554","https://openalex.org/W2893725421","https://openalex.org/W2982316857","https://openalex.org/W2982619091","https://openalex.org/W3024044737","https://openalex.org/W3044207482","https://openalex.org/W3082488836","https://openalex.org/W3100516791","https://openalex.org/W3105836502","https://openalex.org/W3145123113","https://openalex.org/W3203483019","https://openalex.org/W4285145746","https://openalex.org/W4298857966","https://openalex.org/W4378945285","https://openalex.org/W4393241288","https://openalex.org/W6604328405","https://openalex.org/W6692846177","https://openalex.org/W6719354989","https://openalex.org/W6721634521","https://openalex.org/W6741002519","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2808944898","https://openalex.org/W1995270367","https://openalex.org/W4387627836","https://openalex.org/W2767696758","https://openalex.org/W2107793209","https://openalex.org/W3196207352","https://openalex.org/W2951724202","https://openalex.org/W3084261076","https://openalex.org/W2041767423","https://openalex.org/W4387820098"],"abstract_inverted_index":{"In":[0],"this":[1,99],"paper":[2,100],"we":[3],"show":[4],"how":[5,187,196],"to":[6,79,88,102,119,143,146,148,158,206],"improve":[7,103],"the":[8,47,83,111,121,161,178,193],"performance":[9,104,183],"of":[10,31,49,76,85,91,98,105,201],"backward":[11,63,106],"chained":[12,64,107],"behavior":[13],"trees":[14],"(BTs)":[15],"that":[16,51,136,165],"include":[17],"policies":[18,34],"trained":[19],"with":[20,54],"reinforcement":[21],"learning":[22],"(RL).":[23],"BTs":[24,50,108],"represent":[25],"a":[26,43,59,74,89,115,156,215],"hierarchical":[27],"and":[28,195],"modular":[29],"way":[30,157],"combining":[32],"control":[33,38],"into":[35],"higher":[36],"level":[37],"policies.":[39],"Backward":[40],"chaining":[41],"is":[42,101,212],"design":[44],"principle":[45],"for":[46,71,82,124],"construction":[48],"combines":[52],"reactivity":[53],"goal":[55,93],"directed":[56],"actions":[57],"in":[58,114,141,199,214],"structured":[60],"way.":[61],"The":[62,95,181,209],"structure":[65],"has":[66],"also":[67,175],"enabled":[68],"convergence":[69,84,117],"proofs":[70],"BTs,":[72],"identifying":[73],"set":[75,90,159],"local":[77],"conditions":[78,112,134],"be":[80,139],"satisfied":[81],"all":[86],"trajectories":[87],"desired":[92],"states.":[94],"key":[96],"idea":[97],"by":[109],"using":[110],"identified":[113,130,179],"theoretical":[116],"proof":[118],"configure":[120],"RL":[122,162],"problems":[123],"individual":[125],"controllers.":[126],"Specifically,":[127],"previous":[128],"analysis":[129],"so-called":[131],"active":[132],"constraint":[133],"(ACCs),":[135],"should":[137],"not":[138,168],"violated":[140],"order":[142],"avoid":[144,176],"having":[145],"return":[147],"work":[149],"on":[150,186],"previously":[151],"achieved":[152],"subgoals.":[153],"We":[154],"propose":[155],"up":[160],"problems,":[163],"such":[164],"they":[166],"do":[167],"only":[169],"achieve":[170],"each":[171],"immediate":[172],"subgoal,":[173],"but":[174],"violating":[177],"ACCs.":[180],"resulting":[182],"improvement":[184],"depends":[185],"often":[188],"ACC":[189],"violations":[190],"occurred":[191],"before":[192],"change,":[194],"much":[197],"effort,":[198],"terms":[200],"execution":[202],"time,":[203],"was":[204],"needed":[205],"re-achieve":[207],"them.":[208],"proposed":[210],"approach":[211],"illustrated":[213],"dynamic":[216],"simulation":[217],"environment.":[218]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
