{"id":"https://openalex.org/W4402354563","doi":"https://doi.org/10.1145/3695464","title":"A novel tree-based method for interpretable reinforcement learning","display_name":"A novel tree-based method for interpretable reinforcement learning","publication_year":2024,"publication_date":"2024-09-09","ids":{"openalex":"https://openalex.org/W4402354563","doi":"https://doi.org/10.1145/3695464"},"language":"en","primary_location":{"id":"doi:10.1145/3695464","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3695464","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009463737","display_name":"Yifan Li","orcid":"https://orcid.org/0000-0001-5347-7775"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifan Li","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050911130","display_name":"Shuhan Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuhan Qi","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328995","display_name":"Xuan Wang","orcid":"https://orcid.org/0000-0002-3512-0649"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100417632","display_name":"Jiajia Zhang","orcid":"https://orcid.org/0000-0001-6611-2046"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiajia Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062911162","display_name":"Lei Cui","orcid":"https://orcid.org/0009-0000-5814-8698"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Cui","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China and Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5009463737"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12976686,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.640902578830719},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5908499956130981},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5709580183029175},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.570268452167511},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5385414958000183},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29540854692459106}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.640902578830719},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5908499956130981},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5709580183029175},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.570268452167511},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5385414958000183},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29540854692459106},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695464","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3695464","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2282821441","https://openalex.org/W2739349903","https://openalex.org/W2766447205","https://openalex.org/W2891503716","https://openalex.org/W2891830784","https://openalex.org/W2912083425","https://openalex.org/W2945976633","https://openalex.org/W2958089299","https://openalex.org/W2962858109","https://openalex.org/W2964449086","https://openalex.org/W2979200397","https://openalex.org/W2982316857","https://openalex.org/W2996061341","https://openalex.org/W3042368254","https://openalex.org/W3045825034","https://openalex.org/W3092083846","https://openalex.org/W3126813721","https://openalex.org/W3154435685","https://openalex.org/W3157950068","https://openalex.org/W3164005523","https://openalex.org/W3173672478","https://openalex.org/W3182221256","https://openalex.org/W3195438473","https://openalex.org/W3198804470","https://openalex.org/W4283322404","https://openalex.org/W4283382154","https://openalex.org/W4283699829","https://openalex.org/W4310466989","https://openalex.org/W4312274013","https://openalex.org/W4368232818","https://openalex.org/W4385767379","https://openalex.org/W4386179745","https://openalex.org/W4388642305"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,147],"learning":[2,16],"(DRL)":[3],"has":[4],"garnered":[5],"remarkable":[6],"success":[7],"across":[8,164],"various":[9],"domains,":[10],"propelled":[11],"by":[12,156],"advancements":[13],"in":[14,32,50,115,179],"deep":[15],"(DL)":[17],"technologies.":[18],"However,":[19],"the":[20,28,83,88,106,112,125,130,169,175,181],"opacity":[21],"of":[22,30,171,177,185],"DL":[23],"presents":[24],"significant":[25],"challenges,":[26],"limiting":[27],"application":[29],"DRL":[31],"critical":[33],"systems.":[34],"In":[35],"response,":[36],"decision":[37],"tree":[38],"(DT)-based":[39],"methods,":[40],"known":[41],"for":[42,54,73,145],"their":[43],"transparent":[44],"decision-making":[45,55],"mechanisms,":[46],"have":[47,140,167],"shown":[48],"promise":[49],"making":[51],"interpretable":[52,146],"policies":[53,66],"problems.":[56],"Existing":[57],"methods":[58],"often":[59],"employ":[60],"differential":[61,85],"DTs":[62,72,86],"to":[63,70,80,123,153],"model":[64],"RL":[65],"and":[67,87,135,183],"discretize":[68],"them":[69],"conventional":[71],"higher":[74],"interpretability.":[75],"Yet,":[76],"this":[77,93,172],"method":[78],"leads":[79],"discrepancies":[81],"between":[82,132],"trained":[84,133],"discretized":[89,136],"DTs.":[90],"To":[91],"address":[92],"issue,":[94],"we":[95,139],"introduce":[96],"Generative":[97],"Consistent":[98],"Trees":[99],"(GCTs),":[100],"a":[101,120],"novel":[102],"solution":[103],"that":[104],"circumvents":[105],"information":[107],"loss":[108],"typically":[109],"associated":[110],"with":[111],"argmax":[113],"operation":[114],"prior":[116],"research.":[117],"By":[118],"implementing":[119],"reparameterization":[121],"technique":[122],"approximate":[124],"categorical":[126],"distribution,":[127],"GCTs":[128,134,155,178],"ensure":[129],"consistencies":[131],"counterparts.":[137],"Moreover,":[138],"developed":[141],"an":[142],"imitation-learning-based":[143],"framework":[144,150],"learning.":[148],"This":[149],"is":[151],"designed":[152],"train":[154],"efficiently":[157],"mimicking":[158],"expert":[159],"policies.":[160],"Our":[161],"extensive":[162],"experiments":[163],"multiple":[165],"environments":[166],"validated":[168],"effectiveness":[170],"approach,":[173],"highlighting":[174],"potential":[176],"enhancing":[180],"interpretability":[182],"applicability":[184],"DRL.":[186]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
