{"id":"https://openalex.org/W2974731134","doi":"https://doi.org/10.29007/g7bg","title":"Multi-task Learning and Catastrophic Forgetting in Continual Reinforcement Learning","display_name":"Multi-task Learning and Catastrophic Forgetting in Continual Reinforcement Learning","publication_year":2019,"publication_date":"2019-12-10","ids":{"openalex":"https://openalex.org/W2974731134","doi":"https://doi.org/10.29007/g7bg","mag":"2974731134"},"language":"en","primary_location":{"id":"doi:10.29007/g7bg","is_oa":true,"landing_page_url":"https://doi.org/10.29007/g7bg","pdf_url":"https://easychair.org/publications/open/8RPq","source":{"id":"https://openalex.org/S4220651395","display_name":"EPiC series in computing","issn_l":"2398-7340","issn":["2398-7340"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EPiC Series in Computing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://easychair.org/publications/open/8RPq","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075183110","display_name":"Jo\u00e3o G. Ribeiro","orcid":"https://orcid.org/0000-0002-2484-0114"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jo\u00e3o Ribeiro","raw_affiliation_strings":["INESC-ID/Instituto Superior Tcnico University of Lisbon Lisbon, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INESC-ID/Instituto Superior Tcnico University of Lisbon Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101616573","display_name":"Francisco S. Melo","orcid":"https://orcid.org/0000-0001-5705-7372"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Francisco Melo","raw_affiliation_strings":["INESC-ID/Instituto Superior Tcnico University of Lisbon Lisbon, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INESC-ID/Instituto Superior Tcnico University of Lisbon Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017714052","display_name":"Jo\u00e3o Dias","orcid":"https://orcid.org/0000-0002-1653-1821"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jo\u00e3o Dias","raw_affiliation_strings":["INESC-ID/Instituto Superior Tcnico University of Lisbon Lisbon, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INESC-ID/Instituto Superior Tcnico University of Lisbon Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4338,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72302921,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.88427734375},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7550996541976929},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7020624876022339},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6757075786590576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5986474752426147},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5344428420066833},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.5044659376144409},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38267433643341064},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.22399461269378662},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.13494917750358582},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08143001794815063},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07441762089729309}],"concepts":[{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.88427734375},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7550996541976929},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7020624876022339},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6757075786590576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5986474752426147},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5344428420066833},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.5044659376144409},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38267433643341064},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.22399461269378662},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.13494917750358582},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08143001794815063},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07441762089729309},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.29007/g7bg","is_oa":true,"landing_page_url":"https://doi.org/10.29007/g7bg","pdf_url":"https://easychair.org/publications/open/8RPq","source":{"id":"https://openalex.org/S4220651395","display_name":"EPiC series in computing","issn_l":"2398-7340","issn":["2398-7340"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EPiC Series in Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.10008","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.10008","pdf_url":"https://arxiv.org/pdf/1909.10008","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2974731134","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1909.10008","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1909.10008","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1909.10008","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.29007/g7bg","is_oa":true,"landing_page_url":"https://doi.org/10.29007/g7bg","pdf_url":"https://easychair.org/publications/open/8RPq","source":{"id":"https://openalex.org/S4220651395","display_name":"EPiC series in computing","issn_l":"2398-7340","issn":["2398-7340"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EPiC Series in Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4399999976158142}],"awards":[{"id":"https://openalex.org/G1523888516","display_name":null,"funder_award_id":"FA9550-","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G1801779633","display_name":null,"funder_award_id":"FA9550-19-1-0020","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G5809100787","display_name":null,"funder_award_id":"FA9550","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G6700943011","display_name":null,"funder_award_id":"UID/CEC/50021/2019","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2974731134.pdf","grobid_xml":"https://content.openalex.org/works/W2974731134.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W1757796397","https://openalex.org/W2015861736","https://openalex.org/W2123979492","https://openalex.org/W2124175081","https://openalex.org/W2125074935","https://openalex.org/W2131600418","https://openalex.org/W2145339207","https://openalex.org/W2161381512","https://openalex.org/W2169673611","https://openalex.org/W2174786457","https://openalex.org/W2257979135","https://openalex.org/W2260756217","https://openalex.org/W2402144811","https://openalex.org/W2426267443","https://openalex.org/W2473930607","https://openalex.org/W2523246573","https://openalex.org/W2560647685","https://openalex.org/W2584377191","https://openalex.org/W2736601468","https://openalex.org/W2743151379","https://openalex.org/W2774373350","https://openalex.org/W2788388592","https://openalex.org/W2798705390","https://openalex.org/W2914746235","https://openalex.org/W2950004691","https://openalex.org/W2950395671","https://openalex.org/W2952509347","https://openalex.org/W2963850662","https://openalex.org/W2964043796","https://openalex.org/W3103780890","https://openalex.org/W6713134421","https://openalex.org/W6732837357","https://openalex.org/W6849896277"],"related_works":["https://openalex.org/W2998496267","https://openalex.org/W2804033308","https://openalex.org/W2952448454","https://openalex.org/W3103605161","https://openalex.org/W3118694267","https://openalex.org/W3208761439","https://openalex.org/W2398490066","https://openalex.org/W3209208698","https://openalex.org/W3173590384","https://openalex.org/W2004030284","https://openalex.org/W2953713676","https://openalex.org/W3128466077","https://openalex.org/W2945530748","https://openalex.org/W3167613675","https://openalex.org/W2809425026","https://openalex.org/W2528846071","https://openalex.org/W2999391737","https://openalex.org/W3148342934","https://openalex.org/W3041557432","https://openalex.org/W3164371539"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3],"investigate":[4],"two":[5,34,41,81,116,142,165,206],"hypothesis":[6,19,65],"regarding":[7],"the":[8,23,57,69,73,98,115,171,190,205],"use":[9],"of":[10,25,56,71,196,201],"deep":[11,28,76],"reinforcement":[12,29],"learning":[13,30,49],"in":[14,114,137],"multiple":[15],"tasks.":[16,118,208],"The":[17,63],"first":[18],"is":[20,37,66,91,136,176,181],"driven":[21,67],"by":[22,46,68],"question":[24,70],"whether":[26,72],"a":[27,50,102,122,154,198],"algorithm,":[31,78,128],"trained":[32,44,79,129,146,166],"on":[33,80,97,130,153,170,189,204],"similar":[35,52,82,95,103,187],"tasks,":[36],"able":[38,92,109,139,184],"to":[39,93,110,140,185],"outperform":[40,141],"single-task,":[42,150],"individually":[43,147],"algorithms,":[45],"more":[47],"efficiently":[48],"new,":[51,155],"task,":[53,100,173,192],"that":[54,121],"none":[55],"three":[58],"algorithms":[59,169],"has":[60],"encountered":[61],"before.":[62],"second":[64],"same":[74],"multi-task":[75,123,167],"RL":[77],"tasks":[83],"and":[84,133],"augmented":[85,177],"with":[86,178],"elastic":[87],"weight":[88],"consolidation":[89],"(EWC),":[90],"retain":[94],"performance":[96,188],"new":[99,191],"as":[101],"algorithm":[104],"without":[105],"EWC,":[106,179],"whilst":[107],"being":[108],"overcome":[111],"catastrophic":[112,202],"forgetting":[113,203],"previous":[117,207],"We":[119,159],"show":[120,161],"Asynchronous":[124],"Advantage":[125],"Actor-Critic":[126],"(GA3C)":[127],"Space":[131],"Invaders":[132],"Demon":[134],"Attack,":[135],"fact":[138],"single-tasks":[143],"GA3C":[144,168],"versions,":[145],"for":[148],"each":[149],"when":[151,163],"evaluated":[152],"third":[156,172],"task\u2014namely,":[157],"Phoenix.":[158],"also":[160,194],"that,":[162],"training":[164],"if":[174],"one":[175],"it":[180],"not":[182],"only":[183],"achieve":[186],"but":[193],"capable":[195],"overcoming":[197],"substantial":[199],"amount":[200]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
