{"id":"https://openalex.org/W4390481298","doi":"https://doi.org/10.1109/humanoids57100.2023.10375194","title":"Estimator-Coupled Reinforcement Learning for Robust Purely Tactile In-Hand Manipulation","display_name":"Estimator-Coupled Reinforcement Learning for Robust Purely Tactile In-Hand Manipulation","publication_year":2023,"publication_date":"2023-12-12","ids":{"openalex":"https://openalex.org/W4390481298","doi":"https://doi.org/10.1109/humanoids57100.2023.10375194"},"language":"en","primary_location":{"id":"doi:10.1109/humanoids57100.2023.10375194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids57100.2023.10375194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE-RAS 22nd International Conference on Humanoid Robots (Humanoids)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047970921","display_name":"Lennart R\u00f6stel","orcid":null},"institutions":[{"id":"https://openalex.org/I83041830","display_name":"Deggendorf Institute of Technology","ror":"https://ror.org/02kw5st29","country_code":"DE","type":"education","lineage":["https://openalex.org/I83041830"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Lennart R\u00f6stel","raw_affiliation_strings":["DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology"],"affiliations":[{"raw_affiliation_string":"DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology","institution_ids":["https://openalex.org/I83041830","https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075436631","display_name":"Johannes Pitz","orcid":null},"institutions":[{"id":"https://openalex.org/I83041830","display_name":"Deggendorf Institute of Technology","ror":"https://ror.org/02kw5st29","country_code":"DE","type":"education","lineage":["https://openalex.org/I83041830"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Johannes Pitz","raw_affiliation_strings":["DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology"],"affiliations":[{"raw_affiliation_string":"DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology","institution_ids":["https://openalex.org/I83041830","https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043414969","display_name":"Leon Sievers","orcid":"https://orcid.org/0000-0001-6430-4618"},"institutions":[{"id":"https://openalex.org/I83041830","display_name":"Deggendorf Institute of Technology","ror":"https://ror.org/02kw5st29","country_code":"DE","type":"education","lineage":["https://openalex.org/I83041830"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Leon Sievers","raw_affiliation_strings":["DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology"],"affiliations":[{"raw_affiliation_string":"DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology","institution_ids":["https://openalex.org/I83041830","https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058972548","display_name":"Berthold B\u00e4uml","orcid":"https://orcid.org/0000-0002-4545-4765"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]},{"id":"https://openalex.org/I83041830","display_name":"Deggendorf Institute of Technology","ror":"https://ror.org/02kw5st29","country_code":"DE","type":"education","lineage":["https://openalex.org/I83041830"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Berthold B\u00e4uml","raw_affiliation_strings":["DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology"],"affiliations":[{"raw_affiliation_string":"DLR Institute of Robotics and Mechatronics, Technical University of Munich and Deggendorf Institute of Technology","institution_ids":["https://openalex.org/I83041830","https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047970921"],"corresponding_institution_ids":["https://openalex.org/I62916508","https://openalex.org/I83041830"],"apc_list":null,"apc_paid":null,"fwci":2.0659,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.87317193,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8037325143814087},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7227317094802856},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7163052558898926},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6727762818336487},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6123719215393066},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5801404118537903},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.5598470568656921},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.48187074065208435},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4519479274749756},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12491145730018616},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11339837312698364},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11071133613586426}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8037325143814087},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7227317094802856},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7163052558898926},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6727762818336487},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6123719215393066},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5801404118537903},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.5598470568656921},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.48187074065208435},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4519479274749756},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12491145730018616},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11339837312698364},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11071133613586426},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/humanoids57100.2023.10375194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids57100.2023.10375194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE-RAS 22nd International Conference on Humanoid Robots (Humanoids)","raw_type":"proceedings-article"},{"id":"pmh:oai:elib.dlr.de:202624","is_oa":false,"landing_page_url":"https://elib.dlr.de/202624/","pdf_url":null,"source":{"id":"https://openalex.org/S4377196266","display_name":"elib (German Aerospace Center)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2898391981","host_organization_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","host_organization_lineage":["https://openalex.org/I2898391981"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Konferenzbeitrag"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1968917201","https://openalex.org/W2090149479","https://openalex.org/W2134167019","https://openalex.org/W2736601468","https://openalex.org/W2885163910","https://openalex.org/W2981030070","https://openalex.org/W2990747716","https://openalex.org/W3099587965","https://openalex.org/W4210590759","https://openalex.org/W4285047691","https://openalex.org/W4287637675","https://openalex.org/W4312410889","https://openalex.org/W4383108265","https://openalex.org/W4383109140","https://openalex.org/W4385403828","https://openalex.org/W4385430564","https://openalex.org/W4385430700","https://openalex.org/W6741002519","https://openalex.org/W6784458062","https://openalex.org/W6846634178"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W4390569940","https://openalex.org/W2888392564","https://openalex.org/W4361193272","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W2806259446","https://openalex.org/W2963326959","https://openalex.org/W4247136043","https://openalex.org/W4312407344"],"abstract_inverted_index":{"This":[0,104],"paper":[1],"identifies":[2],"and":[3,13,72,75,112,156],"addresses":[4],"the":[5,23,34,40,57,70,73,77,92,96,117,153,169,176,182,190,227],"problems":[6],"with":[7,33,152],"naively":[8],"combining":[9,76],"(reinforcement)":[10],"learning-based":[11],"controllers":[12],"state":[14,59,65,97,110],"estimators":[15],"for":[16,63,157,202],"robotic":[17],"in-hand":[18,31],"manipulation.":[19],"Specifically,":[20],"we":[21,163],"tackle":[22],"challenging":[24,234],"task":[25,118],"of":[26,56,140,168,171,195,207,229],"purely":[27],"tactile,":[28],"goal-conditioned,":[29],"dextrous":[30],"reorientation":[32],"hand":[35],"pointing":[36],"downwards.":[37],"Due":[38],"to":[39,83,95,107,185,214,219],"limited":[41],"sensing":[42],"available,":[43],"many":[44],"control":[45,93],"strategies":[46],"that":[47],"are":[48],"feasible":[49],"in":[50,102,189,220,232],"simulation":[51,150],"when":[52],"having":[53],"full":[54],"knowledge":[55],"object's":[58],"do":[60],"not":[61],"allow":[62],"accurate":[64],"estimation.":[66],"Hence,":[67],"separately":[68],"training":[69,101,138],"controller":[71],"estimator":[74,98],"two":[78],"at":[79],"test":[80],"time":[81,139],"leads":[82,106],"poor":[84],"performance.":[85],"We":[86,174],"solve":[87],"this":[88,233],"problem":[89],"by":[90,180],"coupling":[91],"policy":[94,126],"already":[99],"during":[100],"simulation.":[103],"approach":[105],"more":[108],"robust":[109],"estimation":[111],"overall":[113],"higher":[114],"performance":[115,170],"on":[116,144],"while":[119],"maintaining":[120],"an":[121,165],"interpretability":[122],"advantage":[123],"over":[124],"end-to-end":[125],"learning.":[127],"With":[128],"our":[129,172,210],"GPU-accelerated":[130],"implementation,":[131],"learning":[132],"from":[133],"scratch":[134],"takes":[135],"a":[136,145,204,216],"median":[137,221],"only":[141],"6.5":[142],"hours":[143],"single,":[146],"low-cost":[147],"GPU.":[148],"In":[149],"experiments":[151],"DLR-Hand":[154],"II":[155],"four":[158,183],"significantly":[159],"different":[160],"object":[161],"shapes,":[162],"provide":[164],"in-depth":[166],"analysis":[167],"approach.":[173],"demonstrate":[175],"successful":[177],"sim2real":[178],"transfer":[179],"rotating":[181],"objects":[184],"all":[186],"24":[187],"orientations":[188],"<tex":[191],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[192],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\pi/2$</tex>":[193],"discretization":[194],"SO(3),":[196],"which":[197,224],"has":[198],"never":[199],"been":[200],"achieved":[201],"such":[203],"diverse":[205],"set":[206],"shapes.":[208],"Finally,":[209],"method":[211],"is":[212],"able":[213],"reorient":[215],"cube":[217],"consecutively":[218],"nine":[222],"goals,":[223],"was":[225],"beyond":[226],"reach":[228],"previous":[230],"methods":[231],"setting.":[235],"(Web:":[236],"https://dlr-alr.github.io/dlr-tactile-manipulation)":[237]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2024-01-02T00:00:00"}
