{"id":"https://openalex.org/W4409882754","doi":"https://doi.org/10.1109/lra.2025.3564780","title":"Enhancing Exploration With Diffusion Policies in Hybrid Off-Policy RL: Application to Non-Prehensile Manipulation","display_name":"Enhancing Exploration With Diffusion Policies in Hybrid Off-Policy RL: Application to Non-Prehensile Manipulation","publication_year":2025,"publication_date":"2025-04-28","ids":{"openalex":"https://openalex.org/W4409882754","doi":"https://doi.org/10.1109/lra.2025.3564780"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2025.3564780","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3564780","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107853525","display_name":"Huy Le","orcid":null},"institutions":[{"id":"https://openalex.org/I889804353","display_name":"Robert Bosch (Germany)","ror":"https://ror.org/01fe0jt45","country_code":"DE","type":"company","lineage":["https://openalex.org/I889804353"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Huy Le","raw_affiliation_strings":["Bosch Center for Artificial Intelligence, Renningen, Germany"],"raw_orcid":"https://orcid.org/0009-0005-0162-2084","affiliations":[{"raw_affiliation_string":"Bosch Center for Artificial Intelligence, Renningen, Germany","institution_ids":["https://openalex.org/I889804353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082503814","display_name":"Tai Hoang","orcid":null},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tai Hoang","raw_affiliation_strings":["Institute for Anthropomatics and Robotics, Karlsruhe Institute of Technology, Karlsruhe, Germany"],"raw_orcid":"https://orcid.org/0009-0008-9080-8105","affiliations":[{"raw_affiliation_string":"Institute for Anthropomatics and Robotics, Karlsruhe Institute of Technology, Karlsruhe, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Miroslav Gabriel","orcid":"https://orcid.org/0009-0002-6454-777X"},"institutions":[{"id":"https://openalex.org/I889804353","display_name":"Robert Bosch (Germany)","ror":"https://ror.org/01fe0jt45","country_code":"DE","type":"company","lineage":["https://openalex.org/I889804353"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Miroslav Gabriel","raw_affiliation_strings":["Bosch Center for Artificial Intelligence, Renningen, Germany"],"raw_orcid":"https://orcid.org/0009-0002-6454-777X","affiliations":[{"raw_affiliation_string":"Bosch Center for Artificial Intelligence, Renningen, Germany","institution_ids":["https://openalex.org/I889804353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110467801","display_name":"Gerhard Neumann","orcid":"https://orcid.org/0000-0002-5483-4225"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gerhard Neumann","raw_affiliation_strings":["Institute for Anthropomatics and Robotics, Karlsruhe Institute of Technology, Karlsruhe, Germany"],"raw_orcid":"https://orcid.org/0000-0002-5483-4225","affiliations":[{"raw_affiliation_string":"Institute for Anthropomatics and Robotics, Karlsruhe Institute of Technology, Karlsruhe, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043775193","display_name":"Ngo Anh Vien","orcid":"https://orcid.org/0000-0001-9646-267X"},"institutions":[{"id":"https://openalex.org/I889804353","display_name":"Robert Bosch (Germany)","ror":"https://ror.org/01fe0jt45","country_code":"DE","type":"company","lineage":["https://openalex.org/I889804353"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ngo Anh Vien","raw_affiliation_strings":["Bosch Center for Artificial Intelligence, Renningen, Germany"],"raw_orcid":"https://orcid.org/0000-0001-9646-267X","affiliations":[{"raw_affiliation_string":"Bosch Center for Artificial Intelligence, Renningen, Germany","institution_ids":["https://openalex.org/I889804353"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5107853525"],"corresponding_institution_ids":["https://openalex.org/I889804353"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08919344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"6","first_page":"6143","last_page":"6150"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11749","display_name":"Iterative Learning Control Systems","score":0.8669999837875366,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11749","display_name":"Iterative Learning Control Systems","score":0.8669999837875366,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14083","display_name":"Extremum Seeking Control Systems","score":0.859499990940094,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11601","display_name":"Neuroscience and Neural Engineering","score":0.836899995803833,"subfield":{"id":"https://openalex.org/subfields/2804","display_name":"Cellular and Molecular Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prehensile-tail","display_name":"Prehensile tail","score":0.9817090630531311},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.49255093932151794},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.37375307083129883},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09729981422424316},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.0907948911190033},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.06490102410316467}],"concepts":[{"id":"https://openalex.org/C136380597","wikidata":"https://www.wikidata.org/wiki/Q10508905","display_name":"Prehensile tail","level":2,"score":0.9817090630531311},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.49255093932151794},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37375307083129883},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09729981422424316},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0907948911190033},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.06490102410316467},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2025.3564780","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3564780","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W2109169869","https://openalex.org/W2158782408","https://openalex.org/W2918308866","https://openalex.org/W2963030226","https://openalex.org/W3120441392","https://openalex.org/W3196603227","https://openalex.org/W3199614037","https://openalex.org/W3208348590","https://openalex.org/W4321607911","https://openalex.org/W4385403811","https://openalex.org/W4385430497","https://openalex.org/W4393241407","https://openalex.org/W4402354160","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6750186571","https://openalex.org/W6756675569","https://openalex.org/W6765775151","https://openalex.org/W6779823529","https://openalex.org/W6782766965","https://openalex.org/W6796589144","https://openalex.org/W6800570120","https://openalex.org/W6841549819","https://openalex.org/W6843816287","https://openalex.org/W6846435682","https://openalex.org/W6849187823","https://openalex.org/W6850614898","https://openalex.org/W6850975553","https://openalex.org/W6851885053","https://openalex.org/W6851959672","https://openalex.org/W6852002412","https://openalex.org/W6852572940","https://openalex.org/W6853146131","https://openalex.org/W6856443129","https://openalex.org/W6857214445","https://openalex.org/W6857467272","https://openalex.org/W6858967177","https://openalex.org/W6861230427","https://openalex.org/W6861688831","https://openalex.org/W6861787071","https://openalex.org/W6862624746","https://openalex.org/W6869200280"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2465876097","https://openalex.org/W2028231052","https://openalex.org/W2768832826","https://openalex.org/W3166169123","https://openalex.org/W2010045274","https://openalex.org/W2552641899","https://openalex.org/W2105329304"],"abstract_inverted_index":{"Learning":[0],"diverse":[1,148],"policies":[2],"for":[3,8,160],"non-prehensile":[4],"manipulation":[5],"is":[6,81,91,110,176],"essential":[7],"improving":[9],"skill":[10],"transfer":[11],"and":[12,36,52,69,130,137],"generalization":[13],"to":[14,101,152,165],"out-of-distribution":[15],"scenarios.":[16],"In":[17],"this":[18,56],"work,":[19],"we":[20,41,54],"enhance":[21],"exploration":[22],"through":[23,83],"a":[24,29,49,58,94,102,113,168],"two-":[25],"fold":[26],"approach":[27,99],"within":[28],"hybrid":[30,98],"framework":[31,63],"that":[32,64,144],"tackles":[33],"both":[34,66,135],"discrete":[35,68,73],"continuous":[37,44,70,89],"action":[38,74],"spaces.":[39],"First,":[40],"model":[42],"the":[43,67,88,106,122],"motion":[45],"parameter":[46],"policy":[47],"as":[48,77,112],"diffusion":[50],"model,":[51],"second,":[53],"incorporate":[55],"into":[57],"maximum":[59,107],"entropy":[60,108],"reinforcement":[61],"learning":[62],"unifies":[65],"components.":[71],"The":[72],"space,":[75],"such":[76],"contact":[78],"point":[79],"selection,":[80],"optimized":[82],"Q-value":[84],"function":[85],"maximization,":[86],"while":[87],"part":[90],"guided":[92],"by":[93],"diffusion-based":[95],"policy.":[96],"This":[97],"leads":[100],"principled":[103],"objective,":[104],"where":[105],"term":[109],"derived":[111],"lower":[114],"bound":[115],"using":[116],"structured":[117],"variational":[118],"inference.":[119],"We":[120],"propose":[121],"Hybrid":[123],"Diffusion":[124],"Policy":[125],"algorithm":[126],"(<bold":[127],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[128,180],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">hydo</b>)":[129],"evaluate":[131],"its":[132],"performance":[133],"on":[134,167],"simulation":[136],"zero-shot":[138],"sim2real":[139],"tasks.":[140],"Our":[141],"results":[142],"show":[143],"HyDo":[145],"encourages":[146],"more":[147],"behavior":[149],"policies,":[150],"leading":[151],"significantly":[153],"improved":[154],"success":[155],"rates":[156],"across":[157],"tasks":[158],"-":[159],"example,":[161],"increasing":[162],"from":[163],"53%":[164],"72%":[166],"real-world":[169],"6D":[170],"pose":[171],"alignment":[172],"task.":[173],"Project":[174],"page":[175],"available":[177],"at":[178],"<uri":[179],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://leh2rng.github.io/hydo</uri>.":[181]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
