{"id":"https://openalex.org/W2968340082","doi":"https://doi.org/10.1109/icra.2019.8794102","title":"Dexterous Manipulation with Deep Reinforcement Learning: Efficient, General, and Low-Cost","display_name":"Dexterous Manipulation with Deep Reinforcement Learning: Efficient, General, and Low-Cost","publication_year":2019,"publication_date":"2019-05-01","ids":{"openalex":"https://openalex.org/W2968340082","doi":"https://doi.org/10.1109/icra.2019.8794102","mag":"2968340082"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2019.8794102","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2019.8794102","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009899534","display_name":"Henry Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Henry Zhu","raw_affiliation_strings":["UC Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017906439","display_name":"Abhishek Gupta","orcid":"https://orcid.org/0000-0003-1117-325X"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abhishek Gupta","raw_affiliation_strings":["UC Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040732794","display_name":"Aravind Rajeswaran","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aravind Rajeswaran","raw_affiliation_strings":["University of Washington"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["UC Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101582318","display_name":"Vikash Kumar","orcid":"https://orcid.org/0000-0001-6422-6066"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vikash Kumar","raw_affiliation_strings":["Google Brain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":15.1752,"has_fulltext":false,"cited_by_count":172,"citation_normalized_percentile":{"value":0.99355017,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3651","last_page":"3657"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8560630083084106},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7607613801956177},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.658011794090271},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6451840996742249},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6343805193901062},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5526309609413147},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.5518796443939209},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.4372754693031311},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4248321056365967},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4216485619544983},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.41832929849624634},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.33386749029159546},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09497636556625366}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8560630083084106},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7607613801956177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.658011794090271},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6451840996742249},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6343805193901062},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5526309609413147},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.5518796443939209},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.4372754693031311},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4248321056365967},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4216485619544983},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.41832929849624634},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.33386749029159546},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09497636556625366},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra.2019.8794102","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2019.8794102","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W1510186039","https://openalex.org/W1564897360","https://openalex.org/W1771410628","https://openalex.org/W1820657498","https://openalex.org/W2012392077","https://openalex.org/W2019165997","https://openalex.org/W2052863493","https://openalex.org/W2073408938","https://openalex.org/W2112036576","https://openalex.org/W2122331303","https://openalex.org/W2123967136","https://openalex.org/W2134167019","https://openalex.org/W2152247404","https://openalex.org/W2161697934","https://openalex.org/W2167117957","https://openalex.org/W2172968643","https://openalex.org/W2210408922","https://openalex.org/W2416041116","https://openalex.org/W2741122588","https://openalex.org/W2775954438","https://openalex.org/W2785962646","https://openalex.org/W2788862220","https://openalex.org/W2885163910","https://openalex.org/W2962749646","https://openalex.org/W2962957005","https://openalex.org/W2963099939","https://openalex.org/W2963170432","https://openalex.org/W2963184939","https://openalex.org/W2963276406","https://openalex.org/W2963376229","https://openalex.org/W2963411833","https://openalex.org/W2963649664","https://openalex.org/W2963713397","https://openalex.org/W2964173023","https://openalex.org/W2990747716","https://openalex.org/W4295549420","https://openalex.org/W4297818513","https://openalex.org/W4297824641","https://openalex.org/W6638018090","https://openalex.org/W6653435097","https://openalex.org/W6677049199","https://openalex.org/W6683526187","https://openalex.org/W6728252718","https://openalex.org/W6731094094","https://openalex.org/W6734129231","https://openalex.org/W6740023956","https://openalex.org/W6742461812","https://openalex.org/W6744563498","https://openalex.org/W6744564315","https://openalex.org/W6746914971","https://openalex.org/W6748645729","https://openalex.org/W6751082867"],"related_works":["https://openalex.org/W2475116013","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2140315382","https://openalex.org/W2059109728","https://openalex.org/W322691623","https://openalex.org/W2494989134","https://openalex.org/W2509444723","https://openalex.org/W2004958254"],"abstract_inverted_index":{"Dexterous":[0],"multi-fingered":[1,66,98,164],"robotic":[2,19],"hands":[3,23,99],"can":[4,100,148,167,184],"perform":[5,128],"a":[6,25,56,129,153,201],"wide":[7],"range":[8],"of":[9,36,123,131,156],"manipulation":[10,95,165],"skills,":[11],"making":[12],"them":[13],"an":[14,71],"appealing":[15],"component":[16],"for":[17,28,59,84,178],"general-purpose":[18],"manipulators.":[20],"However,":[21],"such":[22],"pose":[24],"major":[26],"challenge":[27],"autonomous":[29],"control,":[30],"due":[31],"to":[32,74,79,127,187,206],"the":[33,82,112,121,146,171,197],"high":[34],"dimensionality":[35],"their":[37],"configuration":[38],"space":[39],"and":[40,119,142,181,203,208],"complex":[41,163],"intermittent":[42],"contact":[43,62],"interactions.":[44],"In":[45],"this":[46,186],"work,":[47],"we":[48],"propose":[49],"deep":[50,108,193],"reinforcement":[51],"learning":[52,60,147],"(deep":[53],"RL)":[54],"as":[55],"scalable":[57],"solution":[58],"complex,":[61],"rich":[63],"behaviors":[64],"with":[65,97,106,115],"hands.":[67],"Deep":[68],"RL":[69,109,194],"provides":[70],"end-to-end":[72],"approach":[73],"directly":[75,104],"map":[76],"sensor":[77],"readings":[78],"actions,":[80],"without":[81,120],"need":[83],"task":[85],"specific":[86],"models":[87],"or":[88],"policy":[89],"classes.":[90],"We":[91,125],"show":[92],"that":[93,162,182,191],"contact-rich":[94],"behavior":[96],"be":[101,149,168],"learned":[102,169],"by":[103,151],"training":[105,195],"model-free":[107],"algorithms":[110],"in":[111,170,174,196],"real":[113,172,198],"world,":[114],"minimal":[116],"additional":[117],"assumption":[118],"aid":[122],"simulation.":[124],"learn":[126],"variety":[130],"tasks":[132],"on":[133],"two":[134],"different":[135],"low-cost":[136],"hardware":[137],"platforms":[138],"entirely":[139],"from":[140],"scratch,":[141],"further":[143],"study":[144],"how":[145],"accelerated":[150],"using":[152],"small":[154],"number":[155],"human":[157],"demonstrations.":[158],"Our":[159],"experiments":[160],"demonstrate":[161],"skills":[166],"world":[173,199],"about":[175],"4-7":[176],"hours":[177],"most":[179],"tasks,":[180],"demonstrations":[183],"decrease":[185],"2-3":[188],"hours,":[189],"indicating":[190],"direct":[192],"is":[200],"viable":[202],"practical":[204],"alternative":[205],"simulation":[207],"model-based":[209],"control.":[210],"https://":[211],"sites.google.com/view/deeprl-handmanipulation.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":24},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":30},{"year":2022,"cited_by_count":22},{"year":2021,"cited_by_count":28},{"year":2020,"cited_by_count":29},{"year":2019,"cited_by_count":11}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
