{"id":"https://openalex.org/W4415124287","doi":"https://doi.org/10.1109/tnnls.2025.3591838","title":"Generalizable Offline Multiobjective Reinforcement Learning via Preference-Conditioned Diffuser","display_name":"Generalizable Offline Multiobjective Reinforcement Learning via Preference-Conditioned Diffuser","publication_year":2025,"publication_date":"2025-10-13","ids":{"openalex":"https://openalex.org/W4415124287","doi":"https://doi.org/10.1109/tnnls.2025.3591838","pmid":"https://pubmed.ncbi.nlm.nih.gov/41082419"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3591838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3591838","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113216766","display_name":"Yuchen Xiao","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuchen Xiao","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0007-8604-3042","affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041768936","display_name":"Lei Yuan","orcid":"https://orcid.org/0000-0002-7803-0766"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Yuan","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-7803-0766","affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049063919","display_name":"Lihe Li","orcid":"https://orcid.org/0000-0002-7017-6488"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lihe Li","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102004367","display_name":"Ziqian Zhang","orcid":"https://orcid.org/0000-0001-6123-6373"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziqian Zhang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108326018","display_name":"Yi-Chen Li","orcid":"https://orcid.org/0009-0004-9908-5303"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yichen Li","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0004-9908-5303","affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101458462","display_name":"Yang Yu","orcid":"https://orcid.org/0000-0002-1052-5447"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yu","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-1052-5447","affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5113216766"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31799791,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"12","first_page":"20199","last_page":"20213"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12496","display_name":"Color perception and design","score":0.817799985408783,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12496","display_name":"Color perception and design","score":0.817799985408783,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.8021000027656555,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10838","display_name":"Animal Behavior and Welfare Studies","score":0.6912999749183655,"subfield":{"id":"https://openalex.org/subfields/3404","display_name":"Small Animals"},"field":{"id":"https://openalex.org/fields/34","display_name":"Veterinary"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7232999801635742},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6157000064849854},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.569100022315979},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.48969998955726624},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.45089998841285706},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.41190001368522644},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.3797999918460846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7473000288009644},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7232999801635742},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6604999899864197},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6157000064849854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6104000210762024},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.569100022315979},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.45089998841285706},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.41190001368522644},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3619999885559082},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3447999954223633},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.30959999561309814},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2915000021457672},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3591838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3591838","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:41082419","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41082419","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1234294696","display_name":null,"funder_award_id":"BK20243039","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G576237104","display_name":null,"funder_award_id":"U24A20324","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5862880122","display_name":null,"funder_award_id":"62495093","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7959945567","display_name":null,"funder_award_id":"BK20241199","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1968802531","https://openalex.org/W2119717200","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2954070046","https://openalex.org/W2962894046","https://openalex.org/W3107370826","https://openalex.org/W3109467707","https://openalex.org/W3127561923","https://openalex.org/W3134062350","https://openalex.org/W3152878473","https://openalex.org/W3174886282","https://openalex.org/W3197491224","https://openalex.org/W3200592679","https://openalex.org/W3216656735","https://openalex.org/W4214717370","https://openalex.org/W4250482878","https://openalex.org/W4287266177","https://openalex.org/W4288257146","https://openalex.org/W4297964528","https://openalex.org/W4307823382","https://openalex.org/W4321483843","https://openalex.org/W4360884927","https://openalex.org/W4366336804","https://openalex.org/W4376171493","https://openalex.org/W4385245566","https://openalex.org/W4387195417","https://openalex.org/W4396609072","https://openalex.org/W4400363791","https://openalex.org/W4402968332","https://openalex.org/W4403614620","https://openalex.org/W4403615048","https://openalex.org/W4406265015","https://openalex.org/W4406354674","https://openalex.org/W4407361523","https://openalex.org/W4408352406","https://openalex.org/W4408565215","https://openalex.org/W4415800404"],"related_works":[],"abstract_inverted_index":{"Multiobjective":[0],"reinforcement":[1],"learning":[2,12,105],"(MORL)":[3],"addresses":[4],"sequential":[5],"decision-making":[6],"problems":[7],"with":[8,25],"multiple":[9],"objectives":[10],"by":[11,106],"policies":[13],"optimized":[14],"for":[15,42,76,143,169],"diverse":[16],"pReferences.":[17],"While":[18],"traditional":[19],"methods":[20],"necessitate":[21],"costly":[22],"online":[23],"interaction":[24],"the":[26,39,79,99,112,130,135,141,160],"environment,":[27],"recent":[28],"approaches":[29],"leverage":[30],"static":[31],"datasets":[32,142],"containing":[33],"precollected":[34],"trajectories,":[35],"making":[36],"offline":[37,47,94,178],"MORL":[38,48,68,144,179],"preferred":[40],"choice":[41],"real-world":[43],"applications.":[44],"However,":[45],"existing":[46],"techniques":[49],"suffer":[50],"from":[51],"limited":[52],"expressiveness":[53,81],"and":[54,82,102,133],"poor":[55],"generalization":[56,92,175],"on":[57,111,120,140,163],"out-of-distribution":[58],"(OOD)":[59],"preferences.":[60],"To":[61],"overcome":[62],"these":[63],"limitations,":[64],"we":[65],"propose":[66],"diffusion-based":[67,72],"(DiffMORL),":[69],"a":[70,121],"generalizable":[71],"planning":[73],"frame":[74],"work":[75],"MORL.":[77],"Leveraging":[78],"strong":[80],"generation":[83],"capability":[84],"of":[85,166],"diffusion":[86],"models,":[87],"DiffMORL":[88,115,149],"further":[89],"boosts":[90],"its":[91,173],"through":[93],"data":[95,107],"mixup,":[96],"which":[97],"mitigates":[98],"memorization":[100],"phenomenon":[101],"facilitates":[103],"feature":[104],"augmentation.":[108],"By":[109],"training":[110],"augmented":[113],"data,":[114],"is":[116],"able":[117],"to":[118,128],"condition":[119],"given":[122],"preference,":[123],"whether":[124],"in-distribution":[125],"or":[126],"OOD,":[127],"plan":[129],"desired":[131],"trajectory":[132],"extract":[134],"corresponding":[136],"action.":[137],"Evaluations":[138],"conducted":[139],"(D4MORL)":[145],"benchmark":[146],"demonstrate":[147],"that":[148],"achieves":[150],"state-of-the-art":[151],"results":[152],"across":[153],"nearly":[154],"all":[155],"tasks.":[156],"Notably,":[157],"it":[158],"surpasses":[159],"best":[161],"baseline":[162],"14":[164],"out":[165],"18":[167],"metrics":[168],"OOD":[170],"generalization,":[171],"underscoring":[172],"remarkable":[174],"ability":[176],"in":[177],"scenarios.":[180]},"counts_by_year":[],"updated_date":"2025-12-03T23:09:05.601824","created_date":"2025-10-14T00:00:00"}
