{"id":"https://openalex.org/W4280508537","doi":"https://doi.org/10.1109/tcyb.2022.3170485","title":"A Dirichlet Process Mixture of Robust Task Models for Scalable Lifelong Reinforcement Learning","display_name":"A Dirichlet Process Mixture of Robust Task Models for Scalable Lifelong Reinforcement Learning","publication_year":2022,"publication_date":"2022-05-17","ids":{"openalex":"https://openalex.org/W4280508537","doi":"https://doi.org/10.1109/tcyb.2022.3170485","pmid":"https://pubmed.ncbi.nlm.nih.gov/35580095"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2022.3170485","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2022.3170485","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2205.10787","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100376415","display_name":"Zhi Wang","orcid":"https://orcid.org/0000-0003-0304-3965"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhi Wang","raw_affiliation_strings":["School of Management and Engineering, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Management and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100697167","display_name":"Chunlin Chen","orcid":"https://orcid.org/0000-0003-3929-4707"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunlin Chen","raw_affiliation_strings":["School of Management and Engineering, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Management and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000582423","display_name":"Daoyi Dong","orcid":"https://orcid.org/0000-0002-7425-3559"},"institutions":[{"id":"https://openalex.org/I188329596","display_name":"University of Canberra","ror":"https://ror.org/04s1nv328","country_code":"AU","type":"education","lineage":["https://openalex.org/I188329596"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Daoyi Dong","raw_affiliation_strings":["School of Engineering and Information Technology, University of New South Wales, Canberra, Australia"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Information Technology, University of New South Wales, Canberra, Australia","institution_ids":["https://openalex.org/I188329596","https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100376415"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":2.6214,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.91003825,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"53","issue":"12","first_page":"7509","last_page":"7520"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8083153963088989},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6965480446815491},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5744203925132751},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.5501202344894409},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5306823253631592},{"id":"https://openalex.org/keywords/dirichlet-process","display_name":"Dirichlet process","score":0.5300533175468445},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.5245991945266724},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5206767320632935},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5182545185089111},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.48139217495918274},{"id":"https://openalex.org/keywords/lifelong-learning","display_name":"Lifelong learning","score":0.46862226724624634},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4248954653739929},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.23484182357788086}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8083153963088989},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6965480446815491},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5744203925132751},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.5501202344894409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5306823253631592},{"id":"https://openalex.org/C2781280628","wikidata":"https://www.wikidata.org/wiki/Q5280766","display_name":"Dirichlet process","level":3,"score":0.5300533175468445},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.5245991945266724},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5206767320632935},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5182545185089111},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.48139217495918274},{"id":"https://openalex.org/C108771440","wikidata":"https://www.wikidata.org/wiki/Q368475","display_name":"Lifelong learning","level":2,"score":0.46862226724624634},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4248954653739929},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.23484182357788086},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tcyb.2022.3170485","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2022.3170485","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:35580095","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35580095","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null},{"id":"pmh:oai:arXiv.org:2205.10787","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.10787","pdf_url":"https://arxiv.org/pdf/2205.10787","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2205.10787","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.10787","pdf_url":"https://arxiv.org/pdf/2205.10787","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.41999998688697815}],"awards":[{"id":"https://openalex.org/G3174144325","display_name":null,"funder_award_id":"DP190101566","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G4904587412","display_name":null,"funder_award_id":"62006111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6081884918","display_name":null,"funder_award_id":"BK20200330","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G7996831764","display_name":null,"funder_award_id":"62073160","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1959608418","https://openalex.org/W2020573190","https://openalex.org/W2031727428","https://openalex.org/W2056584142","https://openalex.org/W2103753221","https://openalex.org/W2116522068","https://openalex.org/W2131479143","https://openalex.org/W2133458291","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2161381512","https://openalex.org/W2529605558","https://openalex.org/W2560647685","https://openalex.org/W2604737628","https://openalex.org/W2604763608","https://openalex.org/W2605102758","https://openalex.org/W2754791538","https://openalex.org/W2757047188","https://openalex.org/W2788388592","https://openalex.org/W2792645523","https://openalex.org/W2803973384","https://openalex.org/W2804175194","https://openalex.org/W2891076394","https://openalex.org/W2895958971","https://openalex.org/W2902456977","https://openalex.org/W2904106049","https://openalex.org/W2905334533","https://openalex.org/W2912681837","https://openalex.org/W2921236134","https://openalex.org/W2933570795","https://openalex.org/W2947461406","https://openalex.org/W2951400270","https://openalex.org/W2952629144","https://openalex.org/W2960705509","https://openalex.org/W2962974944","https://openalex.org/W2963540014","https://openalex.org/W2963559848","https://openalex.org/W2963864421","https://openalex.org/W2964048876","https://openalex.org/W2982316857","https://openalex.org/W2996472503","https://openalex.org/W2999912861","https://openalex.org/W3014664339","https://openalex.org/W3034815680","https://openalex.org/W3046093665","https://openalex.org/W4214717370","https://openalex.org/W4237591687","https://openalex.org/W4289117725","https://openalex.org/W4294562888","https://openalex.org/W4295883599","https://openalex.org/W4299879270","https://openalex.org/W4301163820","https://openalex.org/W4319988532","https://openalex.org/W6640963894","https://openalex.org/W6679468046","https://openalex.org/W6679518283","https://openalex.org/W6684578138","https://openalex.org/W6684921986","https://openalex.org/W6728925229","https://openalex.org/W6736057607","https://openalex.org/W6738602802","https://openalex.org/W6741087337","https://openalex.org/W6741217325","https://openalex.org/W6742852309","https://openalex.org/W6744707562","https://openalex.org/W6748819345","https://openalex.org/W6751591294","https://openalex.org/W6755353450","https://openalex.org/W6756463683","https://openalex.org/W6756754374","https://openalex.org/W6760539816","https://openalex.org/W6763462227","https://openalex.org/W6764998394","https://openalex.org/W6771640813","https://openalex.org/W6849896277"],"related_works":["https://openalex.org/W4289718052","https://openalex.org/W2164121020","https://openalex.org/W3204184292","https://openalex.org/W2145559838","https://openalex.org/W2905319430","https://openalex.org/W3116498279","https://openalex.org/W4287549553","https://openalex.org/W3176564347","https://openalex.org/W2355833770","https://openalex.org/W4310285384"],"abstract_inverted_index":{"While":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"algorithms":[4],"are":[5,114],"achieving":[6],"state-of-the-art":[7],"performance":[8],"in":[9,82,158],"various":[10],"challenging":[11],"tasks,":[12],"they":[13],"can":[14,165],"easily":[15],"encounter":[16],"catastrophic":[17],"forgetting":[18],"or":[19,137],"interference":[20],"when":[21],"faced":[22],"with":[23,121],"lifelong":[24,34,191],"streaming":[25],"information.":[26],"In":[27],"this":[28],"article,":[29],"we":[30,140,183],"propose":[31],"a":[32,56,83,95],"scalable":[33,190],"RL":[35,192],"method":[36,187],"that":[37,100,185],"dynamically":[38,128],"expands":[39],"the":[40,62,72,79,88,92,112,117,130,142,152,159,162],"network":[41],"capacity":[42],"to":[43,60,146,170],"accommodate":[44],"new":[45,102],"knowledge":[46],"while":[47],"preventing":[48],"past":[49],"memories":[50],"from":[51],"being":[52],"perturbed.":[53],"We":[54,86],"use":[55,141],"Dirichlet":[57],"process":[58,98],"mixture":[59,93,103,113],"model":[61,131,157,164],"nonstationary":[63],"task":[64,68,80,135,156],"distribution,":[65],"which":[66,127],"captures":[67],"relatedness":[69],"by":[70,116],"estimating":[71],"likelihood":[73],"of":[74,91,111,154],"task-to-cluster":[75],"assignments":[76],"and":[77,109,168,180,193],"clusters":[78],"models":[81],"latent":[84],"space.":[85],"formulate":[87],"prior":[89,149],"distribution":[90],"as":[94,105],"Chinese":[96],"restaurant":[97],"(CRP)":[99],"instantiates":[101],"components":[104],"needed.":[106],"The":[107],"update":[108],"expansion":[110],"governed":[115],"Bayesian":[118],"nonparametric":[119],"framework":[120],"an":[122],"expectation":[123],"maximization":[124],"(EM)":[125],"procedure,":[126],"adapts":[129],"complexity":[132],"without":[133],"explicit":[134],"boundaries":[136],"heuristics.":[138],"Moreover,":[139],"domain":[143],"randomization":[144],"technique":[145],"train":[147],"robust":[148],"parameters":[150],"for":[151],"initialization":[153],"each":[155],"mixture;":[160],"thus,":[161],"resulting":[163],"better":[166],"generalize":[167],"adapt":[169],"unseen":[171],"tasks.":[172],"With":[173],"extensive":[174],"experiments":[175],"conducted":[176],"on":[177],"robot":[178],"navigation":[179],"locomotion":[181],"domains,":[182],"show":[184],"our":[186],"successfully":[188],"facilitates":[189],"outperforms":[194],"relevant":[195],"existing":[196],"methods.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
