{"id":"https://openalex.org/W3134875704","doi":"https://doi.org/10.1109/tpami.2021.3103132","title":"Continuous Action Reinforcement Learning From a Mixture of Interpretable Experts","display_name":"Continuous Action Reinforcement Learning From a Mixture of Interpretable Experts","publication_year":2021,"publication_date":"2021-08-10","ids":{"openalex":"https://openalex.org/W3134875704","doi":"https://doi.org/10.1109/tpami.2021.3103132","mag":"3134875704","pmid":"https://pubmed.ncbi.nlm.nih.gov/34375280"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2021.3103132","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2021.3103132","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2006.05911","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075701789","display_name":"Riad Akrour","orcid":"https://orcid.org/0000-0002-8735-6960"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Riad Akrour","raw_affiliation_strings":["Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001304610","display_name":"Davide Tateo","orcid":"https://orcid.org/0000-0002-7193-923X"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Davide Tateo","raw_affiliation_strings":["TU Darmstadt, Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"TU Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071367253","display_name":"Jan Peters","orcid":"https://orcid.org/0000-0002-5266-8091"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Peters","raw_affiliation_strings":["TU Darmstadt, Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"TU Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075701789"],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03254448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"44","issue":"10","first_page":"6795","last_page":"6806"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8756662607192993},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7689318060874939},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.7554687857627869},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6799653172492981},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5772104859352112},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.562862753868103},{"id":"https://openalex.org/keywords/differentiable-function","display_name":"Differentiable function","score":0.5490657091140747},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.5357479453086853},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5157268643379211},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5129191875457764},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5078433156013489},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4872218668460846},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.46213454008102417},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42708471417427063},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.14525678753852844},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10035240650177002}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8756662607192993},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7689318060874939},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.7554687857627869},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6799653172492981},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5772104859352112},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.562862753868103},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.5490657091140747},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.5357479453086853},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5157268643379211},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5129191875457764},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5078433156013489},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4872218668460846},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.46213454008102417},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42708471417427063},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.14525678753852844},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10035240650177002},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":6,"locations":[{"id":"doi:10.1109/tpami.2021.3103132","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2021.3103132","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:34375280","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34375280","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null},{"id":"pmh:oai:arXiv.org:2006.05911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2006.05911","pdf_url":"https://arxiv.org/pdf/2006.05911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3134875704","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2006.05911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:re.public.polimi.it:11311/1280845","is_oa":false,"landing_page_url":"https://hdl.handle.net/11311/1280845","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},{"id":"doi:10.48550/arxiv.2006.05911","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2006.05911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2006.05911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2006.05911","pdf_url":"https://arxiv.org/pdf/2006.05911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7699999809265137,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3134875704.pdf","grobid_xml":"https://content.openalex.org/works/W3134875704.grobid-xml"},"referenced_works_count":118,"referenced_works":["https://openalex.org/W165672800","https://openalex.org/W1508965793","https://openalex.org/W1592847719","https://openalex.org/W1600813180","https://openalex.org/W1754881896","https://openalex.org/W1771410628","https://openalex.org/W1839697241","https://openalex.org/W2066334462","https://openalex.org/W2073384958","https://openalex.org/W2104209987","https://openalex.org/W2108535023","https://openalex.org/W2109910161","https://openalex.org/W2113882472","https://openalex.org/W2118022153","https://openalex.org/W2118556122","https://openalex.org/W2119579400","https://openalex.org/W2120346334","https://openalex.org/W2121517924","https://openalex.org/W2121863487","https://openalex.org/W2124477018","https://openalex.org/W2130005627","https://openalex.org/W2130801532","https://openalex.org/W2133853511","https://openalex.org/W2142641780","https://openalex.org/W2145339207","https://openalex.org/W2150480892","https://openalex.org/W2155027007","https://openalex.org/W2155968351","https://openalex.org/W2158548602","https://openalex.org/W2158782408","https://openalex.org/W2160371091","https://openalex.org/W2164878629","https://openalex.org/W2172968643","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2282821441","https://openalex.org/W2312609093","https://openalex.org/W2344023930","https://openalex.org/W2397240726","https://openalex.org/W2468354762","https://openalex.org/W2498991332","https://openalex.org/W2594336441","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2798766386","https://openalex.org/W2803281228","https://openalex.org/W2806098286","https://openalex.org/W2899270112","https://openalex.org/W2899685447","https://openalex.org/W2912565176","https://openalex.org/W2920215304","https://openalex.org/W2922057017","https://openalex.org/W2946146170","https://openalex.org/W2946148574","https://openalex.org/W2949267040","https://openalex.org/W2949608212","https://openalex.org/W2962749646","https://openalex.org/W2963184621","https://openalex.org/W2963286043","https://openalex.org/W2963319332","https://openalex.org/W2963335963","https://openalex.org/W2963403868","https://openalex.org/W2963438456","https://openalex.org/W2963615220","https://openalex.org/W2963616477","https://openalex.org/W2964043796","https://openalex.org/W2964118262","https://openalex.org/W2964227312","https://openalex.org/W2964231903","https://openalex.org/W2973229164","https://openalex.org/W2998004401","https://openalex.org/W2998670262","https://openalex.org/W3003712948","https://openalex.org/W3007111659","https://openalex.org/W3038620866","https://openalex.org/W3090004508","https://openalex.org/W3091897465","https://openalex.org/W4211007335","https://openalex.org/W4214717370","https://openalex.org/W4242606736","https://openalex.org/W4299401133","https://openalex.org/W6606822168","https://openalex.org/W6627932998","https://openalex.org/W6637806891","https://openalex.org/W6638018090","https://openalex.org/W6638827766","https://openalex.org/W6676557315","https://openalex.org/W6676885637","https://openalex.org/W6677737365","https://openalex.org/W6678030633","https://openalex.org/W6679257226","https://openalex.org/W6680724558","https://openalex.org/W6682385587","https://openalex.org/W6683204974","https://openalex.org/W6683443546","https://openalex.org/W6683821272","https://openalex.org/W6691861496","https://openalex.org/W6692846177","https://openalex.org/W6693502484","https://openalex.org/W6705013792","https://openalex.org/W6712173889","https://openalex.org/W6719911377","https://openalex.org/W6734023089","https://openalex.org/W6734129231","https://openalex.org/W6736368053","https://openalex.org/W6739901393","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6748603076","https://openalex.org/W6749992176","https://openalex.org/W6751437432","https://openalex.org/W6752089545","https://openalex.org/W6752216738","https://openalex.org/W6755697580","https://openalex.org/W6759871227","https://openalex.org/W6762939852","https://openalex.org/W6773656828","https://openalex.org/W7011603464"],"related_works":["https://openalex.org/W3192708540","https://openalex.org/W3035521307","https://openalex.org/W3037429136","https://openalex.org/W3213484318","https://openalex.org/W3136683507","https://openalex.org/W76760840","https://openalex.org/W3034724428","https://openalex.org/W1925600676","https://openalex.org/W3008076766","https://openalex.org/W2132713246","https://openalex.org/W3025362081","https://openalex.org/W2907704766","https://openalex.org/W3092485320","https://openalex.org/W2963794592","https://openalex.org/W2103064945","https://openalex.org/W3037443774","https://openalex.org/W276460289","https://openalex.org/W2946824041","https://openalex.org/W2749952662","https://openalex.org/W2261891975"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"has":[3],"demonstrated":[4],"its":[5,73],"ability":[6],"to":[7,32,50,81,103,106,114,120,136,180],"solve":[8],"high":[9],"dimensional":[10],"tasks":[11],"by":[12,23,141],"leveraging":[13],"non-linear":[14],"function":[15,70],"approximators.":[16],"However,":[17],"these":[18],"successes":[19],"are":[20,177],"mostly":[21],"achieved":[22],"'black-box'":[24,43],"policies":[25,54,158,175],"in":[26,59],"simulated":[27],"domains.":[28],"When":[29],"deploying":[30],"RL":[31,163],"the":[33,39,52,79,122,133,138,166],"real":[34],"world,":[35],"several":[36],"concerns":[37],"regarding":[38],"use":[40],"of":[41,93,132,168],"a":[42,62,68,83,91,99,104,107],"policy":[44,63,80],"might":[45],"be":[46],"raised.":[47],"In":[48],"order":[49],"make":[51],"learned":[53],"more":[55,178],"transparent,":[56],"we":[57,149],"propose":[58],"this":[60,142],"paper":[61,134],"iteration":[64],"scheme":[65],"that":[66,151,176],"retains":[67],"complex":[69],"approximator":[71],"for":[72],"internal":[74],"value":[75],"predictions":[76],"but":[77,173],"constrains":[78],"have":[82],"concise,":[84],"hierarchical,":[85],"and":[86],"human-readable":[87],"structure,":[88],"based":[89,171],"on":[90,159],"mixture":[92],"interpretable":[94,118],"experts.":[95],"Each":[96],"expert":[97],"selects":[98],"primitive":[100],"action":[101,161],"according":[102],"distance":[105],"prototypical":[108,123,144],"state.":[109],"A":[110],"key":[111],"design":[112],"decision":[113],"keep":[115],"such":[116],"experts":[117],"is":[119,135],"select":[121],"states":[124],"from":[125],"trajectory":[126],"data.":[127],"The":[128],"main":[129],"technical":[130],"contribution":[131],"address":[137],"challenges":[139],"introduced":[140],"non-differentiable":[143],"state":[145],"selection":[146],"procedure.":[147],"Experimentally,":[148],"show":[150],"our":[152],"proposed":[153],"algorithm":[154],"can":[155],"learn":[156],"compelling":[157],"continuous":[160],"deep":[162],"benchmarks,":[164],"matching":[165],"performance":[167],"neural":[169,184],"network":[170,185],"policies,":[172],"returning":[174],"amenable":[179],"human":[181],"inspection":[182],"than":[183],"or":[186],"linear-in-feature":[187],"policies.":[188]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
