{"id":"https://openalex.org/W2997756730","doi":"https://doi.org/10.1109/tcds.2021.3050723","title":"Augmented Memory Replay in Reinforcement Learning With Continuous Control","display_name":"Augmented Memory Replay in Reinforcement Learning With Continuous Control","publication_year":2021,"publication_date":"2021-01-13","ids":{"openalex":"https://openalex.org/W2997756730","doi":"https://doi.org/10.1109/tcds.2021.3050723","mag":"2997756730"},"language":"en","primary_location":{"id":"doi:10.1109/tcds.2021.3050723","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2021.3050723","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1912.12719","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Mirza Ramicic","orcid":"https://orcid.org/0000-0002-6302-4515"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Mirza Ramicic","raw_affiliation_strings":["Artificial Intelligence Center, Faculty of Electrical Engineering, Czech Technical University in Prague, Prague, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Center, Faculty of Electrical Engineering, Czech Technical University in Prague, Prague, Czech Republic","institution_ids":["https://openalex.org/I44504214"]}]},{"author_position":"last","author":{"id":null,"display_name":"Andrea Bonarini","orcid":"https://orcid.org/0000-0002-4880-4521"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea Bonarini","raw_affiliation_strings":["Dipartimento di Elettronica, Informazione e Bioingegneria, Artificial Intelligence and Robotics Lab, Politecnico di Milano, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Informazione e Bioingegneria, Artificial Intelligence and Robotics Lab, Politecnico di Milano, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I44504214"],"apc_list":null,"apc_paid":null,"fwci":0.5599,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71477954,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"14","issue":"2","first_page":"485","last_page":"496"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7192999720573425,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7192999720573425,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.034699998795986176,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.03060000017285347,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.8549000024795532},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8526999950408936},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.45669999718666077},{"id":"https://openalex.org/keywords/memory-consolidation","display_name":"Memory consolidation","score":0.4366999864578247},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4081999957561493},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.3952000141143799},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.37290000915527344},{"id":"https://openalex.org/keywords/learning-classifier-system","display_name":"Learning classifier system","score":0.3711000084877014}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8712999820709229},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.8549000024795532},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8526999950408936},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.45669999718666077},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45660001039505005},{"id":"https://openalex.org/C48455012","wikidata":"https://www.wikidata.org/wiki/Q2892593","display_name":"Memory consolidation","level":3,"score":0.4366999864578247},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4081999957561493},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.3952000141143799},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.37290000915527344},{"id":"https://openalex.org/C199190896","wikidata":"https://www.wikidata.org/wiki/Q3509276","display_name":"Learning classifier system","level":3,"score":0.3711000084877014},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C53442348","wikidata":"https://www.wikidata.org/wiki/Q745101","display_name":"Content-addressable memory","level":3,"score":0.3416999876499176},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3303000032901764},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.3091999888420105},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2761000096797943},{"id":"https://openalex.org/C30390489","wikidata":"https://www.wikidata.org/wiki/Q4680748","display_name":"Adaptive memory","level":3,"score":0.2734000086784363},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.2574000060558319}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tcds.2021.3050723","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2021.3050723","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1912.12719","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.12719","pdf_url":"https://arxiv.org/pdf/1912.12719","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:re.public.polimi.it:11311/1208445","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9319729","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1912.12719","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.12719","pdf_url":"https://arxiv.org/pdf/1912.12719","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1016472498","https://openalex.org/W1935925708","https://openalex.org/W1967286340","https://openalex.org/W1980035368","https://openalex.org/W2001257668","https://openalex.org/W2015177294","https://openalex.org/W2023366928","https://openalex.org/W2033016465","https://openalex.org/W2047057213","https://openalex.org/W2060277733","https://openalex.org/W2076327464","https://openalex.org/W2086162146","https://openalex.org/W2087617385","https://openalex.org/W2091565802","https://openalex.org/W2100377190","https://openalex.org/W2106304233","https://openalex.org/W2120629471","https://openalex.org/W2134460774","https://openalex.org/W2135395238","https://openalex.org/W2145339207","https://openalex.org/W2164424353","https://openalex.org/W2164790343","https://openalex.org/W2165303994","https://openalex.org/W2188721763","https://openalex.org/W2294316001","https://openalex.org/W2424347275","https://openalex.org/W2469090167","https://openalex.org/W2738975853","https://openalex.org/W2739657930","https://openalex.org/W2767442953","https://openalex.org/W2802954485","https://openalex.org/W2912460619","https://openalex.org/W2952504784","https://openalex.org/W2969743374","https://openalex.org/W2981707302","https://openalex.org/W2984408429","https://openalex.org/W3008569983","https://openalex.org/W3014420315","https://openalex.org/W4230890479","https://openalex.org/W4255229883","https://openalex.org/W6635465357","https://openalex.org/W6692846177","https://openalex.org/W6748317118","https://openalex.org/W7028733013"],"related_works":[],"abstract_inverted_index":{"Online":[0],"reinforcement":[1],"learning":[2,54,90,218,262],"agents":[3,174],"are":[4,78,133],"currently":[5],"able":[6,161,251],"to":[7,39,42,162,181,202,252],"process":[8],"an":[9,164,234],"increasing":[10,243],"amount":[11,67],"of":[12,25,51,64,68,73,107,121,148,172,191,216,242,246,260,268],"data":[13],"by":[14,53,125,135,176,225],"converting":[15],"it":[16,38],"into":[17],"a":[18,65,70,98,104,155,217,221],"higher":[19,108],"order":[20,201],"value":[21],"functions.":[22],"This":[23],"expansion":[24],"the":[26,30,33,49,62,74,81,119,128,136,173,185,204,213,244,247,255,261,266],"information":[27],"collected":[28],"from":[29,97],"environment":[31,223],"increases":[32,48],"agent\u2019s":[34],"state":[35],"space":[36],"enabling":[37],"scale":[40],"up":[41],"more":[43],"complex":[44],"problems":[45],"but":[46],"also":[47],"risk":[50],"forgetting":[52,122],"on":[55,154],"redundant":[56],"or":[57,141],"conflicting":[58],"data.":[59],"To":[60],"improve":[61],"approximation":[63],"large":[66],"data,":[69],"random":[71],"mini-batch":[72],"past":[75],"experiences":[76,175],"that":[77,211,233],"stored":[79],"in":[80,112,184,200,220],"replay":[82,140,186],"memory":[83,116,139],"buffer":[84],"is":[85,160,195,250],"often":[86],"replayed":[87],"at":[88],"each":[89,171],"step.":[91],"The":[92,146,188],"proposed":[93,137],"work":[94],"takes":[95],"inspiration":[96],"biological":[99],"mechanism":[100],"which":[101,169],"acts":[102],"as":[103],"protective":[105],"layer":[106],"cognitive":[109],"functions":[110],"found":[111],"mammalian":[113],"brain:":[114],"active":[115],"consolidation":[117],"mitigates":[118],"effect":[120],"previous":[123],"memories":[124,249],"dynamically":[126],"processing":[127],"new":[129],"ones.":[130],"Similar":[131],"dynamics":[132],"implemented":[134],"augmented":[138],"<italic":[142,149,165,192,206,236],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[143,150,166,193,207,237],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AMR</i>":[144,151,194,238],"algorithm.":[145],"architecture":[147],",":[152],"based":[153],"simple":[156],"artificial":[157],"neural":[158],"network":[159],"provide":[163],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">augmentation":[167,208],"policy</i>":[168,209],"modifies":[170],"augmenting":[177],"their":[178],"relevance":[179],"prior":[180],"storing":[182],"them":[183],"memory.":[187],"function":[189,210,240],"approximator":[190],"evolved":[196,235],"using":[197],"genetic":[198],"algorithm":[199],"obtain":[203],"specific":[205,222,248],"yields":[212],"best":[214],"performance":[215],"agent":[219],"given":[224],"its":[226],"received":[227],"cumulative":[228],"reward.":[229],"Experimental":[230],"results":[231],"show":[232],"augmentation":[239],"capable":[241],"significance":[245],"further":[253],"increase":[254],"stability":[256],"and":[257],"convergence":[258],"speed":[259],"algorithms":[263],"dealing":[264],"with":[265],"complexity":[267],"continuous":[269],"action":[270],"domains.":[271]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-01-10T00:00:00"}
