{"id":"https://openalex.org/W4380520279","doi":"https://doi.org/10.1109/tnnls.2023.3281604","title":"Efficient Bayesian Policy Reuse With a Scalable Observation Model in Deep Reinforcement Learning","display_name":"Efficient Bayesian Policy Reuse With a Scalable Observation Model in Deep Reinforcement Learning","publication_year":2023,"publication_date":"2023-06-13","ids":{"openalex":"https://openalex.org/W4380520279","doi":"https://doi.org/10.1109/tnnls.2023.3281604","pmid":"https://pubmed.ncbi.nlm.nih.gov/37310820"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3281604","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3281604","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101769759","display_name":"Jinmei Liu","orcid":"https://orcid.org/0000-0002-6496-0719"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jinmei Liu","raw_affiliation_strings":["Department of Control Science and Intelligent Engineering, School of Management and Engineering, and the Research Center for Novel Technology of Intelligent Equipment, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Intelligent Engineering, School of Management and Engineering, and the Research Center for Novel Technology of Intelligent Equipment, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100376415","display_name":"Zhi Wang","orcid":"https://orcid.org/0000-0003-0304-3965"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Wang","raw_affiliation_strings":["Department of Control Science and Intelligent Engineering, School of Management and Engineering, and the Research Center for Novel Technology of Intelligent Equipment, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Intelligent Engineering, School of Management and Engineering, and the Research Center for Novel Technology of Intelligent Equipment, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100697167","display_name":"Chunlin Chen","orcid":"https://orcid.org/0000-0003-3929-4707"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunlin Chen","raw_affiliation_strings":["Department of Control Science and Intelligent Engineering, School of Management and Engineering, and the Research Center for Novel Technology of Intelligent Equipment, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Intelligent Engineering, School of Management and Engineering, and the Research Center for Novel Technology of Intelligent Equipment, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000582423","display_name":"Daoyi Dong","orcid":"https://orcid.org/0000-0002-7425-3559"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I188329596","display_name":"University of Canberra","ror":"https://ror.org/04s1nv328","country_code":"AU","type":"education","lineage":["https://openalex.org/I188329596"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Daoyi Dong","raw_affiliation_strings":["School of Engineering and Information Technology, University of New South Wales, Canberra, ACT, Australia"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Information Technology, University of New South Wales, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I188329596","https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101769759"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.6993,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.74715262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"35","issue":"10","first_page":"14797","last_page":"14809"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7821017503738403},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7529416084289551},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6913480162620544},{"id":"https://openalex.org/keywords/business-process-reengineering","display_name":"Business process reengineering","score":0.6440392732620239},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6109971404075623},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5925410985946655},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5730230808258057},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5505443811416626},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.5452558994293213},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5424104928970337},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5396132469177246},{"id":"https://openalex.org/keywords/bayesian-inference","display_name":"Bayesian inference","score":0.5273160338401794},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5097793936729431},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.47693341970443726},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4336439371109009},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3285999298095703},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.28978431224823},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09745621681213379},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.08419176936149597}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7821017503738403},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7529416084289551},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6913480162620544},{"id":"https://openalex.org/C29143872","wikidata":"https://www.wikidata.org/wiki/Q876690","display_name":"Business process reengineering","level":3,"score":0.6440392732620239},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6109971404075623},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5925410985946655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5730230808258057},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5505443811416626},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.5452558994293213},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5424104928970337},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5396132469177246},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.5273160338401794},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5097793936729431},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.47693341970443726},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4336439371109009},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3285999298095703},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28978431224823},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09745621681213379},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.08419176936149597},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0},{"id":"https://openalex.org/C137335462","wikidata":"https://www.wikidata.org/wiki/Q380772","display_name":"Lean manufacturing","level":2,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3281604","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3281604","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:37310820","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37310820","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4399999976158142}],"awards":[{"id":"https://openalex.org/G3378024488","display_name":null,"funder_award_id":"FT220100656","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G4904587412","display_name":null,"funder_award_id":"62006111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6081884918","display_name":null,"funder_award_id":"BK20200330","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G7996831764","display_name":null,"funder_award_id":"62073160","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W778742492","https://openalex.org/W1780905749","https://openalex.org/W1994005439","https://openalex.org/W2004030284","https://openalex.org/W2056584142","https://openalex.org/W2072492557","https://openalex.org/W2097381042","https://openalex.org/W2100494227","https://openalex.org/W2119717200","https://openalex.org/W2129564505","https://openalex.org/W2158782408","https://openalex.org/W2165698076","https://openalex.org/W2605369401","https://openalex.org/W2754517384","https://openalex.org/W2760355785","https://openalex.org/W2766447205","https://openalex.org/W2792645523","https://openalex.org/W2911087563","https://openalex.org/W2912681837","https://openalex.org/W2952465248","https://openalex.org/W2965407115","https://openalex.org/W2965433979","https://openalex.org/W2966477753","https://openalex.org/W2982316857","https://openalex.org/W2999912861","https://openalex.org/W3034815680","https://openalex.org/W3046093665","https://openalex.org/W3091225957","https://openalex.org/W3096650854","https://openalex.org/W3124280310","https://openalex.org/W3130292943","https://openalex.org/W3153119964","https://openalex.org/W3175558129","https://openalex.org/W3187550742","https://openalex.org/W3197344417","https://openalex.org/W3205226031","https://openalex.org/W4214717370","https://openalex.org/W4226257065","https://openalex.org/W6638503097","https://openalex.org/W6674600207","https://openalex.org/W6676875888","https://openalex.org/W6680657880","https://openalex.org/W6685726866","https://openalex.org/W6698463750","https://openalex.org/W6704571135","https://openalex.org/W6718190810","https://openalex.org/W6719360576","https://openalex.org/W6736469832","https://openalex.org/W6738483526","https://openalex.org/W6748839928","https://openalex.org/W6751794878","https://openalex.org/W6754407519","https://openalex.org/W6769729221","https://openalex.org/W6780559895","https://openalex.org/W6782022165","https://openalex.org/W6784646362","https://openalex.org/W6796505199","https://openalex.org/W6991122698","https://openalex.org/W7066478640"],"related_works":["https://openalex.org/W2109135184","https://openalex.org/W1556822568","https://openalex.org/W2139814012","https://openalex.org/W2407816298","https://openalex.org/W2361848772","https://openalex.org/W2136144897","https://openalex.org/W1969388206","https://openalex.org/W2083287972","https://openalex.org/W2602853592","https://openalex.org/W2523375588"],"abstract_inverted_index":{"Bayesian":[0],"policy":[1,7,14,47,220],"reuse":[2],"(BPR)":[3],"is":[4,88],"a":[5,12,30,144,159,193],"general":[6],"transfer":[8,48,200],"framework":[9],"for":[10,96],"selecting":[11],"source":[13,155],"from":[15,157],"an":[16,39,78],"offline":[17],"library":[18],"by":[19,186],"inferring":[20],"the":[21,59,63,75,83,93,112,116,134,139,172,178,182,188],"task":[22,101],"belief":[23],"based":[24,148],"on":[25,149],"some":[26],"observation":[27,32,64,94,118,146,190],"signals":[28,169],"and":[29,70,90,98,124,129,217],"trained":[31],"model.":[33],"In":[34],"this":[35],"article,":[36],"we":[37,81,142,176],"propose":[38,143],"improved":[40],"BPR":[41,56,104,180],"method":[42,212],"to":[43,110,127,167,181],"achieve":[44],"more":[45,99,218],"efficient":[46,219],"in":[49,171,192],"deep":[50],"reinforcement":[51],"learning":[52,184],"(DRL).":[53],"First,":[54],"most":[55],"algorithms":[57,105],"use":[58],"episodic":[60],"return":[61],"as":[62,92,138],"signal":[65,95],"that":[66,210],"contains":[67],"limited":[68],"information":[69],"cannot":[71],"be":[72,122],"obtained":[73],"until":[74],"end":[76],"of":[77,115,154,162],"episode.":[79],"Instead,":[80],"employ":[82],"state":[84,135,151],"transition":[85,136,152],"sample,":[86],"which":[87,120,164,196],"informative":[89],"instantaneous,":[91],"faster":[97,216],"accurate":[100],"inference.":[102],"Second,":[103],"usually":[106],"require":[107],"numerous":[108],"samples":[109],"estimate":[111],"probability":[113],"distribution":[114],"tabular-based":[117],"model,":[119],"may":[121],"expensive":[123],"even":[125],"infeasible":[126],"learn":[128],"maintain,":[130],"especially":[131],"when":[132,201],"using":[133],"sample":[137],"signal.":[140],"Hence,":[141],"scalable":[145,189],"model":[147,191],"fitting":[150],"functions":[153],"tasks":[156],"only":[158],"small":[160],"number":[161],"samples,":[163],"can":[165,197,213],"generalize":[166],"any":[168],"observed":[170],"target":[173],"task.":[174],"Moreover,":[175],"extend":[177],"offline-mode":[179],"continual":[183],"setting":[185],"expanding":[187],"plug-and-play":[194],"fashion,":[195],"avoid":[198],"negative":[199],"faced":[202],"with":[203],"new":[204],"unknown":[205],"tasks.":[206],"Experimental":[207],"results":[208],"show":[209],"our":[211],"consistently":[214],"facilitate":[215],"transfer.":[221]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
