{"id":"https://openalex.org/W4319318414","doi":"https://doi.org/10.1007/s10514-023-10087-8","title":"An empowerment-based solution to robotic manipulation tasks with sparse rewards","display_name":"An empowerment-based solution to robotic manipulation tasks with sparse rewards","publication_year":2023,"publication_date":"2023-02-06","ids":{"openalex":"https://openalex.org/W4319318414","doi":"https://doi.org/10.1007/s10514-023-10087-8"},"language":"en","primary_location":{"id":"doi:10.1007/s10514-023-10087-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10514-023-10087-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10514-023-10087-8.pdf","source":{"id":"https://openalex.org/S144091109","display_name":"Autonomous Robots","issn_l":"0929-5593","issn":["0929-5593","1573-7527"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Robots","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10514-023-10087-8.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031618193","display_name":"Siyu Dai","orcid":"https://orcid.org/0000-0002-0924-3629"},"institutions":[{"id":"https://openalex.org/I126820664","display_name":"Vassar College","ror":"https://ror.org/022x6qg61","country_code":"US","type":"education","lineage":["https://openalex.org/I126820664"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Siyu Dai","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, 32 Vassar St, Cambridge, MA, 02139, USA"],"raw_orcid":"https://orcid.org/0000-0002-0924-3629","affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, 32 Vassar St, Cambridge, MA, 02139, USA","institution_ids":["https://openalex.org/I126820664","https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076156022","display_name":"Wei Xu","orcid":"https://orcid.org/0000-0002-2226-4096"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Xu","raw_affiliation_strings":["General AI Laboratory, Horizon Robotics, 10050 N Wolfe Rd, Cupertino, CA, 95014, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"General AI Laboratory, Horizon Robotics, 10050 N Wolfe Rd, Cupertino, CA, 95014, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074521402","display_name":"Andreas Hofmann","orcid":"https://orcid.org/0000-0002-4800-8429"},"institutions":[{"id":"https://openalex.org/I126820664","display_name":"Vassar College","ror":"https://ror.org/022x6qg61","country_code":"US","type":"education","lineage":["https://openalex.org/I126820664"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Hofmann","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, 32 Vassar St, Cambridge, MA, 02139, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, 32 Vassar St, Cambridge, MA, 02139, USA","institution_ids":["https://openalex.org/I126820664","https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101709620","display_name":"Brian Williams","orcid":"https://orcid.org/0000-0002-1057-3940"},"institutions":[{"id":"https://openalex.org/I126820664","display_name":"Vassar College","ror":"https://ror.org/022x6qg61","country_code":"US","type":"education","lineage":["https://openalex.org/I126820664"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Williams","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, 32 Vassar St, Cambridge, MA, 02139, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, 32 Vassar St, Cambridge, MA, 02139, USA","institution_ids":["https://openalex.org/I126820664","https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5031618193"],"corresponding_institution_ids":["https://openalex.org/I126820664","https://openalex.org/I63966007"],"apc_list":{"value":2590,"currency":"EUR","value_usd":3390},"apc_paid":{"value":2590,"currency":"EUR","value_usd":3390},"fwci":1.3633,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.84005964,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"47","issue":"5","first_page":"617","last_page":"633"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11601","display_name":"Neuroscience and Neural Engineering","score":0.9713000059127808,"subfield":{"id":"https://openalex.org/subfields/2804","display_name":"Cellular and Molecular Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8424471616744995},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.741687536239624},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5697677731513977},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5291968584060669},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5151737928390503},{"id":"https://openalex.org/keywords/curiosity","display_name":"Curiosity","score":0.5150227546691895},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4761628806591034},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4683270752429962},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36724525690078735}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8424471616744995},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.741687536239624},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5697677731513977},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5291968584060669},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5151737928390503},{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.5150227546691895},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4761628806591034},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4683270752429962},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36724525690078735},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10514-023-10087-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10514-023-10087-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10514-023-10087-8.pdf","source":{"id":"https://openalex.org/S144091109","display_name":"Autonomous Robots","issn_l":"0929-5593","issn":["0929-5593","1573-7527"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Robots","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10514-023-10087-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10514-023-10087-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10514-023-10087-8.pdf","source":{"id":"https://openalex.org/S144091109","display_name":"Autonomous Robots","issn_l":"0929-5593","issn":["0929-5593","1573-7527"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Robots","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4319318414.pdf"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1556911442","https://openalex.org/W1591713425","https://openalex.org/W2070683816","https://openalex.org/W2106164082","https://openalex.org/W2114771311","https://openalex.org/W2131940723","https://openalex.org/W2166944917","https://openalex.org/W2181068523","https://openalex.org/W2417786368","https://openalex.org/W2499343943","https://openalex.org/W2561776174","https://openalex.org/W2567070169","https://openalex.org/W2603088459","https://openalex.org/W2604763608","https://openalex.org/W2737215407","https://openalex.org/W2788741142","https://openalex.org/W2788781499","https://openalex.org/W2945774545","https://openalex.org/W2947638559","https://openalex.org/W2963523627","https://openalex.org/W2964227312","https://openalex.org/W2967518492","https://openalex.org/W2968917487","https://openalex.org/W3173212615","https://openalex.org/W4241085363","https://openalex.org/W6674851815","https://openalex.org/W6682849425","https://openalex.org/W6736021936","https://openalex.org/W6740801417","https://openalex.org/W6750106230","https://openalex.org/W6752051073","https://openalex.org/W6754957883","https://openalex.org/W6757021410","https://openalex.org/W6758641611","https://openalex.org/W6771329031","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W4285676344","https://openalex.org/W3094054656","https://openalex.org/W2123270665","https://openalex.org/W2060310955","https://openalex.org/W4382584175","https://openalex.org/W2284924956","https://openalex.org/W3043413210","https://openalex.org/W2613740288","https://openalex.org/W4383268304","https://openalex.org/W3196817267"],"abstract_inverted_index":{"Abstract":[0],"In":[1],"order":[2],"to":[3,9,20,70,93,120,157],"provide":[4],"adaptive":[5],"and":[6,65,82,85,126,163],"user-friendly":[7],"solutions":[8],"robotic":[10,68],"manipulation,":[11],"it":[12],"is":[13,118],"important":[14],"that":[15,54,134],"the":[16,36,110,123],"agent":[17],"can":[18,55,66,143],"learn":[19,71,146],"accomplish":[21],"tasks":[22,162],"even":[23],"if":[24],"they":[25],"are":[26,45],"only":[27,76],"provided":[28],"with":[29,75,105,137],"very":[30],"sparse":[31,77],"instruction":[32],"signals.":[33],"To":[34],"address":[35],"issues":[37],"reinforcement":[38,62],"learning":[39,63,166],"algorithms":[40],"face":[41],"when":[42,135],"task":[43,127],"rewards":[44],"sparse,":[46],"this":[47,87,141],"paper":[48],"proposes":[49],"an":[50],"intrinsic":[51,96,139],"motivation":[52],"approach":[53,88,117,142],"be":[56,155],"easily":[57],"integrated":[58],"into":[59],"any":[60],"standard":[61],"algorithm":[64],"allow":[67],"manipulators":[69,145],"useful":[72],"manipulation":[73,161],"skills":[74,151],"extrinsic":[78],"rewards.":[79],"Through":[80],"integrating":[81],"balancing":[83],"empowerment":[84],"curiosity,":[86],"shows":[89,133],"superior":[90],"performance":[91],"compared":[92],"other":[94,106,158],"state-of-the-art":[95],"exploration":[97,111,124],"approaches":[98],"during":[99],"extensive":[100],"empirical":[101],"testing.":[102],"When":[103],"combined":[104,136],"strategies":[107],"for":[108],"tackling":[109],"challenge,":[112],"e.g.":[113],"curriculum":[114],"learning,":[115],"our":[116],"able":[119],"further":[121],"improve":[122],"efficiency":[125],"success":[128],"rate.":[129],"Qualitative":[130],"analysis":[131],"also":[132],"diversity-driven":[138],"motivations,":[140],"help":[144],"a":[147],"set":[148],"of":[149],"diverse":[150],"which":[152],"could":[153],"potentially":[154],"applied":[156],"more":[159],"complicated":[160],"accelerate":[164],"their":[165],"process.":[167]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
