{"id":"https://openalex.org/W3090729135","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207648","title":"Improved Policy Extraction via Online Q-Value Distillation","display_name":"Improved Policy Extraction via Online Q-Value Distillation","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3090729135","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207648","mag":"3090729135"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn48605.2020.9207648","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064704015","display_name":"Aman Jhunjhunwala","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Aman Jhunjhunwala","raw_affiliation_strings":["Cheriton School of Computer Science, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"Cheriton School of Computer Science, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100334542","display_name":"Jaeyoung Lee","orcid":"https://orcid.org/0000-0003-4390-7676"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jaeyoung Lee","raw_affiliation_strings":["Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081670169","display_name":"Sean Sedwards","orcid":"https://orcid.org/0000-0002-2903-0823"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Sean Sedwards","raw_affiliation_strings":["Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025977445","display_name":"Vahdat Abdelzad","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Vahdat Abdelzad","raw_affiliation_strings":["Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066916130","display_name":"Krzysztof Czarnecki","orcid":"https://orcid.org/0000-0003-1642-1101"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Krzysztof Czarnecki","raw_affiliation_strings":["Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5064704015"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":0.5302,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.73149445,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8051731586456299},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5646381974220276},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5527973771095276},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.5490394830703735},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5162267684936523},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5016615390777588},{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.49365100264549255},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43256285786628723},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.41919976472854614},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.3797186613082886},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3359748125076294},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11426281929016113}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8051731586456299},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5646381974220276},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5527973771095276},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.5490394830703735},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5162267684936523},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5016615390777588},{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.49365100264549255},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43256285786628723},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.41919976472854614},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.3797186613082886},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3359748125076294},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11426281929016113},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn48605.2020.9207648","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1480909796","https://openalex.org/W1515851193","https://openalex.org/W1600437712","https://openalex.org/W1821462560","https://openalex.org/W1999874108","https://openalex.org/W2027857686","https://openalex.org/W2056132907","https://openalex.org/W2091565802","https://openalex.org/W2099302642","https://openalex.org/W2106524649","https://openalex.org/W2120346334","https://openalex.org/W2126292488","https://openalex.org/W2134797427","https://openalex.org/W2144207912","https://openalex.org/W2145339207","https://openalex.org/W2159187228","https://openalex.org/W2294370754","https://openalex.org/W2330820318","https://openalex.org/W2462906003","https://openalex.org/W2554751554","https://openalex.org/W2594633041","https://openalex.org/W2594877703","https://openalex.org/W2618318883","https://openalex.org/W2621053657","https://openalex.org/W2798962412","https://openalex.org/W2803974723","https://openalex.org/W2883535494","https://openalex.org/W2899595183","https://openalex.org/W2911964244","https://openalex.org/W2921558984","https://openalex.org/W2923496202","https://openalex.org/W2951454365","https://openalex.org/W2963784236","https://openalex.org/W2964231903","https://openalex.org/W3085162807","https://openalex.org/W3099006712","https://openalex.org/W4294349862","https://openalex.org/W4294555834","https://openalex.org/W6635902665","https://openalex.org/W6638523607","https://openalex.org/W6677737365","https://openalex.org/W6679909955","https://openalex.org/W6681200002","https://openalex.org/W6683167515","https://openalex.org/W6711870810","https://openalex.org/W6718836005","https://openalex.org/W6729557714","https://openalex.org/W6734194636","https://openalex.org/W6738483526","https://openalex.org/W6739001092","https://openalex.org/W6750697997","https://openalex.org/W6751437432","https://openalex.org/W6753499099","https://openalex.org/W6755542793","https://openalex.org/W6760838277","https://openalex.org/W6761071948","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2571592646","https://openalex.org/W4247855592","https://openalex.org/W2567165815","https://openalex.org/W2740304877","https://openalex.org/W2112583639","https://openalex.org/W2766259847","https://openalex.org/W2978000411","https://openalex.org/W2601600301","https://openalex.org/W4226164546","https://openalex.org/W346633817"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"are":[3,17,45,87],"capable":[4],"of":[5],"solving":[6],"complex":[7],"control":[8],"tasks":[9],"in":[10,51],"challenging":[11],"environments,":[12],"but":[13,44],"their":[14,30,119],"learned":[15],"policies":[16],"hard":[18],"to":[19,24,40,48,89,118],"interpret.":[20],"Not":[21],"being":[22],"able":[23],"explain":[25,102],"or":[26],"verify":[27],"them":[28],"limits":[29],"practical":[31],"applicability.":[32],"By":[33],"contrast,":[34],"decision":[35],"trees":[36,61],"lend":[37],"themselves":[38],"well":[39],"explanation":[41],"and":[42,62,105],"verification,":[43],"not":[46],"easy":[47],"train,":[49],"especially":[50],"an":[52,64,97],"online":[53,112],"fashion.":[54],"In":[55],"this":[56],"work":[57],"we":[58],"introduce":[59],"Q-BSP":[60,85],"propose":[63],"Ordered":[65],"Sequential":[66],"Monte":[67],"Carlo":[68],"training":[69],"algorithm":[70],"that":[71,94,108],"efficiently":[72],"distills":[73],"the":[74,91],"Q-function":[75],"from":[76],"fully":[77],"trained":[78],"deep":[79],"Q-networks":[80],"into":[81],"a":[82],"tree":[83],"structure.":[84],"forests":[86],"used":[88],"generate":[90],"partitioning":[92],"rules":[93],"transparently":[95],"reconstruct":[96],"accurate":[98],"value":[99],"function.":[100],"We":[101],"our":[103],"approach":[104],"provide":[106],"results":[107],"convincingly":[109],"beat":[110],"earlier":[111],"policy":[113],"distillation":[114],"methods":[115],"with":[116],"respect":[117],"own":[120],"performance":[121],"benchmarks.":[122]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
