{"id":"https://openalex.org/W7125957354","doi":"https://doi.org/10.1109/smc58881.2025.11342806","title":"PDD: Planning Offline Meta-RL with Prompt Decision Diffuser","display_name":"PDD: Planning Offline Meta-RL with Prompt Decision Diffuser","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125957354","doi":"https://doi.org/10.1109/smc58881.2025.11342806"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342806","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342806","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124137198","display_name":"Shilin Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shilin Zhang","raw_affiliation_strings":["Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039055022","display_name":"Zican Hu","orcid":"https://orcid.org/0000-0002-6369-5730"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zican Hu","raw_affiliation_strings":["Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124098111","display_name":"Wenhao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhao Wu","raw_affiliation_strings":["Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080760430","display_name":"Xinyan Huang","orcid":"https://orcid.org/0000-0002-0584-8452"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyi Xie","raw_affiliation_strings":["Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124119039","display_name":"Jiangxiang Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangxiang Tang","raw_affiliation_strings":["Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123532488","display_name":"Zhi Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Wang","raw_affiliation_strings":["Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Department of Control Science and Intelligent Engineering,Nanjing,China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124137198"],"corresponding_institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.87247938,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4171","last_page":"4175"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.32899999618530273,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.32899999618530273,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.31679999828338623,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13809999823570251,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7846999764442444},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7616000175476074},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.699999988079071},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.5687999725341797},{"id":"https://openalex.org/keywords/diffuser","display_name":"Diffuser (optics)","score":0.45089998841285706},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3564000129699707},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.32670000195503235},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.31949999928474426}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7846999764442444},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7616000175476074},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.699999988079071},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6656000018119812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6065000295639038},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.5687999725341797},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49000000953674316},{"id":"https://openalex.org/C2780565730","wikidata":"https://www.wikidata.org/wiki/Q5275430","display_name":"Diffuser (optics)","level":3,"score":0.45089998841285706},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3564000129699707},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.32670000195503235},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.31949999928474426},{"id":"https://openalex.org/C28901747","wikidata":"https://www.wikidata.org/wiki/Q177571","display_name":"Decision theory","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.29820001125335693},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.29100000858306885},{"id":"https://openalex.org/C59594135","wikidata":"https://www.wikidata.org/wiki/Q5249242","display_name":"Decision model","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.2630999982357025},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342806","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342806","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.78792804479599}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2158782408","https://openalex.org/W4312933868","https://openalex.org/W4415796837"],"related_works":[],"abstract_inverted_index":{"Inspired":[0],"by":[1,59],"diffusion":[2],"models":[3,33],"that":[4],"revolutionized":[5],"image":[6,29],"generation":[7],"through":[8],"language":[9,26],"conditioning,":[10],"offline":[11,54],"reinforcement":[12,56],"learning":[13,57],"(RL)":[14],"has":[15],"been":[16],"reformulated":[17],"as":[18,62],"a":[19,63],"sequence":[20,32,64],"modeling":[21,65],"problem.":[22],"Similar":[23],"to":[24,38,40,83,102],"how":[25],"descriptions":[27],"guide":[28,84],"generation,":[30],"RL":[31],"require":[34],"task-specific":[35,81],"conditioning":[36],"information":[37,82],"generalize":[39],"new":[41],"tasks.":[42],"We":[43],"propose":[44],"Prompt":[45],"Decision":[46],"Diffuser":[47],"(PDD),":[48],"addressing":[49],"the":[50],"generalization":[51,99],"challenge":[52],"in":[53],"meta":[55],"(OMRL)":[58],"treating":[60],"it":[61],"problem,":[66],"with":[67],"demonstration-based":[68],"prompting":[69],"for":[70],"few-shot":[71,77,98],"adaptation.":[72],"These":[73],"prompts,":[74],"encoded":[75],"from":[76],"demonstrations,":[78],"effectively":[79],"capture":[80],"cross-task":[85],"policy":[86],"generation.":[87],"Experimental":[88],"results":[89],"on":[90],"Mujoco":[91],"and":[92],"Point-Robot":[93],"benchmarks":[94],"demonstrate":[95],"PDD's":[96],"superior":[97],"capabilities":[100],"compared":[101],"baseline":[103],"approaches.":[104]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
