{"id":"https://openalex.org/W4405778737","doi":"https://doi.org/10.1109/lra.2024.3522842","title":"DASP: Hierarchical Offline Reinforcement Learning via Diffusion Autodecoder and Skill Primitive","display_name":"DASP: Hierarchical Offline Reinforcement Learning via Diffusion Autodecoder and Skill Primitive","publication_year":2024,"publication_date":"2024-12-25","ids":{"openalex":"https://openalex.org/W4405778737","doi":"https://doi.org/10.1109/lra.2024.3522842"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3522842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3522842","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Sicheng Liu","orcid":"https://orcid.org/0009-0007-9866-6431"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sicheng Liu","raw_affiliation_strings":["School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0009-0007-9866-6431","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081598860","display_name":"Yunchuan Zhang","orcid":"https://orcid.org/0000-0002-5532-602X"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunchuan Zhang","raw_affiliation_strings":["School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045361189","display_name":"Wenbai Chen","orcid":"https://orcid.org/0000-0001-7683-2776"},"institutions":[{"id":"https://openalex.org/I78675632","display_name":"Beijing Information Science & Technology University","ror":"https://ror.org/04xnqep60","country_code":"CN","type":"education","lineage":["https://openalex.org/I78675632"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbai Chen","raw_affiliation_strings":["School of Automation, Beijing Information Science and Technology University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7683-2776","affiliations":[{"raw_affiliation_string":"School of Automation, Beijing Information Science and Technology University, Beijing, China","institution_ids":["https://openalex.org/I78675632"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029620144","display_name":"Peiliang Wu","orcid":"https://orcid.org/0000-0003-1228-2757"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peiliang Wu","raw_affiliation_strings":["School of Information Science and Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0003-1228-2757","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20698338,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"2","first_page":"1649","last_page":"1655"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7986000180244446,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7986000180244446,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6050645709037781},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5283566117286682},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4896679222583771},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4664302170276642},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.3841540217399597},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3356810510158539},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.21529725193977356},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.05009141564369202},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.041618287563323975}],"concepts":[{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6050645709037781},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5283566117286682},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4896679222583771},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4664302170276642},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3841540217399597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3356810510158539},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.21529725193977356},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.05009141564369202},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.041618287563323975},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2024.3522842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3522842","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1331161382","display_name":null,"funder_award_id":"U20A20167","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1484327127","display_name":null,"funder_award_id":"62276028","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G453775460","display_name":null,"funder_award_id":"F202103079","funder_id":"https://openalex.org/F4320322163","funder_display_name":"Natural Science Foundation of Hebei Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322163","display_name":"Natural Science Foundation of Hebei Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W3217030260","https://openalex.org/W4319663730","https://openalex.org/W4360584316","https://openalex.org/W4366782941","https://openalex.org/W4392815494","https://openalex.org/W4396910019","https://openalex.org/W4402753827","https://openalex.org/W6757469721","https://openalex.org/W6763704811","https://openalex.org/W6765775151","https://openalex.org/W6776601253","https://openalex.org/W6779265984","https://openalex.org/W6783713337","https://openalex.org/W6785193213","https://openalex.org/W6791413555","https://openalex.org/W6796289742","https://openalex.org/W6796589144","https://openalex.org/W6796880758","https://openalex.org/W6802659552","https://openalex.org/W6810488170","https://openalex.org/W6838356327","https://openalex.org/W6838483015","https://openalex.org/W6842971753","https://openalex.org/W6851885053","https://openalex.org/W6857125719","https://openalex.org/W6859530174"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Offline":[0],"reinforcement":[1,25,83],"learning":[2,26,84,120,146],"strives":[3],"to":[4,7,55,62,112,137],"enable":[5],"agents":[6],"effectively":[8],"utilize":[9],"pre-collected":[10],"offline":[11,17,82,95,145],"datasets":[12],"for":[13,68,100,129],"learning.":[14,103],"Such":[15,117],"an":[16,107],"setup":[18],"tremendously":[19],"mitigates":[20],"the":[21,46,56,63,124,130,144],"problems":[22],"of":[23,65,126],"online":[24],"algorithms":[27],"in":[28,32,75,169],"real-world":[29],"applications,":[30],"particularly":[31],"scenarios":[33],"where":[34],"interactions":[35],"are":[36],"constrained":[37],"or":[38],"exploration":[39],"is":[40,134],"costly.":[41],"The":[42],"learned":[43],"strategy,":[44,58],"on":[45,154],"other":[47],"hand,":[48],"has":[49],"a":[50,80,118],"distributional":[51],"bias":[52],"with":[53],"respect":[54],"behavioral":[57],"which":[59,133],"consequently":[60],"leads":[61],"problem":[64],"extrapolation":[66],"error":[67],"out-of-distribution":[69],"actions.":[70],"To":[71],"mitigate":[72],"this":[73,76],"problem,":[74],"paper,":[77],"we":[78,105],"adopt":[79],"hierarchical":[81],"framework":[85],"that":[86,163],"extracts":[87],"recurrent":[88],"and":[89,151,160],"spatio-temporally":[90],"extended":[91],"primitive":[92],"skills":[93],"from":[94],"data":[96],"before":[97],"using":[98],"them":[99],"downstream":[101],"task":[102,140],"Besides,":[104],"introduce":[106],"autodecoder":[108],"conditional":[109],"diffusion":[110],"model":[111,122],"characterize":[113],"low-level":[114],"strategy":[115,131],"decoding.":[116],"deep":[119],"generative":[121],"enables":[123],"reduction":[125],"action":[127],"primitives":[128,142],"space,":[132],"then":[135],"used":[136],"learn":[138],"high-level":[139],"strategy-guided":[141],"via":[143],"algorithm":[147],"IQL.":[148],"Experimental":[149],"results":[150],"ablation":[152],"studies":[153],"D4RL":[155],"benchmark":[156],"tasks":[157],"(Antmaze,":[158],"Adroit":[159],"Kitchen)":[161],"demonstrate":[162],"our":[164],"approach":[165],"achieves":[166],"SOTA":[167],"performance":[168],"most":[170],"tasks.":[171]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
