{"id":"https://openalex.org/W2963454359","doi":"https://doi.org/10.1109/icra.2016.7487174","title":"Learning deep neural network policies with continuous memory states","display_name":"Learning deep neural network policies with continuous memory states","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2963454359","doi":"https://doi.org/10.1109/icra.2016.7487174","mag":"2963454359"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2016.7487174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2016.7487174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056012604","display_name":"Marvin Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marvin Zhang","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008996507","display_name":"Zoe McCarthy","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zoe McCarthy","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005431772","display_name":"Chelsea Finn","orcid":"https://orcid.org/0000-0001-6298-0874"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chelsea Finn","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pieter Abbeel","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5056012604"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":17.3037,"has_fulltext":false,"cited_by_count":90,"citation_normalized_percentile":{"value":0.99128002,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"520","last_page":"527"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.8798506259918213},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7638055086135864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6146043539047241},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.57365483045578},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.5143783688545227},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4730070233345032},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.46697497367858887},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.44887661933898926},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4202110767364502},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.41718360781669617},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4171574115753174},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.12250599265098572},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.0922653079032898}],"concepts":[{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.8798506259918213},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7638055086135864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6146043539047241},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.57365483045578},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.5143783688545227},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4730070233345032},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.46697497367858887},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.44887661933898926},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4202110767364502},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41718360781669617},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4171574115753174},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.12250599265098572},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0922653079032898},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra.2016.7487174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2016.7487174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.8199999928474426,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W834081922","https://openalex.org/W1499669280","https://openalex.org/W1533535072","https://openalex.org/W1564755532","https://openalex.org/W1599010327","https://openalex.org/W1607896360","https://openalex.org/W1640774615","https://openalex.org/W1657542410","https://openalex.org/W1662842982","https://openalex.org/W1815076433","https://openalex.org/W1931877416","https://openalex.org/W2012587148","https://openalex.org/W2055921164","https://openalex.org/W2064675550","https://openalex.org/W2121103318","https://openalex.org/W2125612430","https://openalex.org/W2126909264","https://openalex.org/W2130942839","https://openalex.org/W2140135625","https://openalex.org/W2144913588","https://openalex.org/W2147032798","https://openalex.org/W2155007355","https://openalex.org/W2157331557","https://openalex.org/W2161872510","https://openalex.org/W2949340759","https://openalex.org/W2962957031","https://openalex.org/W2963430173","https://openalex.org/W2964161785","https://openalex.org/W4205513846","https://openalex.org/W6623316541","https://openalex.org/W6631863446","https://openalex.org/W6633472939","https://openalex.org/W6635795003","https://openalex.org/W6636331330","https://openalex.org/W6636894280","https://openalex.org/W6637000695","https://openalex.org/W6638545294","https://openalex.org/W6640174482","https://openalex.org/W6678367057","https://openalex.org/W6679436768","https://openalex.org/W6680657880","https://openalex.org/W6681631837","https://openalex.org/W6682849425"],"related_works":["https://openalex.org/W4387561393","https://openalex.org/W3163481960","https://openalex.org/W3093895509","https://openalex.org/W4283526844","https://openalex.org/W280704926","https://openalex.org/W2476068070","https://openalex.org/W4323971310","https://openalex.org/W2893372175","https://openalex.org/W2323394100","https://openalex.org/W2012104909"],"abstract_inverted_index":{"Policy":[0],"learning":[1,25,110,160],"for":[2,24,30,146],"partially":[3],"observed":[4],"control":[5,182],"tasks":[6,179,203],"requires":[7],"policies":[8,26,66,122,196],"that":[9,55,141,190,197,204],"can":[10,58,120,193],"remember":[11],"salient":[12,85],"information":[13,86],"from":[14,60],"past":[15],"observations.":[16],"In":[17],"this":[18,68,98],"paper,":[19],"we":[20,119],"present":[21],"a":[22,103,108,113,200],"method":[23,114,177,192],"with":[27,51,67,123],"internal":[28],"memory":[29,53,71,139,172],"high-dimensional,":[31],"continuous":[32,181],"systems,":[33],"such":[34],"as":[35],"robotic":[36],"manipulators.":[37],"Our":[38],"approach":[39],"consists":[40],"of":[41,48,70,137,202],"augmenting":[42],"the":[43,49,56,77,83,130,135,138,147,151,158,163],"state":[44],"and":[45,61,107,126,185,188],"action":[46,153],"space":[47],"system":[50],"continuous-valued":[52],"states":[54,140],"policy":[57,78,99,117,148,164],"read":[59],"write":[62],"to.":[63],"Learning":[64],"general-purpose":[65],"type":[69],"representation":[72],"directly":[73],"is":[74],"difficult,":[75],"because":[76],"must":[79],"automatically":[80],"figure":[81],"out":[82],"most":[84],"to":[87,149,165,169],"memorize":[88],"at":[89],"each":[90],"time":[91],"step.":[92],"We":[93,174],"show":[94,189],"that,":[95],"by":[96],"decomposing":[97],"search":[100],"problem":[101],"into":[102],"trajectory":[104,131],"optimization":[105,132],"phase":[106,111,133,161],"supervised":[109,159],"through":[112],"called":[115],"guided":[116],"search,":[118],"acquire":[121],"effective":[124],"memorization":[125,167],"recall":[127],"strategies.":[128],"Intuitively,":[129],"chooses":[134],"values":[136],"will":[142],"make":[143],"it":[144],"easier":[145],"produce":[150,170],"right":[152],"in":[154,183],"future":[155],"states,":[156],"while":[157],"encourages":[162],"use":[166],"actions":[168],"those":[171],"states.":[173],"evaluate":[175],"our":[176,191],"on":[178],"involving":[180],"manipulation":[184],"navigation":[186],"settings,":[187],"learn":[194],"complex":[195],"successfully":[198],"complete":[199],"range":[201],"require":[205],"memory.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":14},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":3}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
