{"id":"https://openalex.org/W4391543889","doi":"https://doi.org/10.1007/s10994-023-06500-z","title":"Goal-conditioned offline reinforcement learning through state space partitioning","display_name":"Goal-conditioned offline reinforcement learning through state space partitioning","publication_year":2024,"publication_date":"2024-02-05","ids":{"openalex":"https://openalex.org/W4391543889","doi":"https://doi.org/10.1007/s10994-023-06500-z"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-023-06500-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06500-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06500-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06500-z.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035818555","display_name":"Mianchu Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Mianchu Wang","raw_affiliation_strings":["University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115604796","display_name":"Yue Jin","orcid":"https://orcid.org/0000-0003-3291-2584"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yue Jin","raw_affiliation_strings":["University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010581004","display_name":"Giovanni Montana","orcid":"https://orcid.org/0000-0003-3942-3900"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]},{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Giovanni Montana","raw_affiliation_strings":["Alan Turing Institute, London, UK","University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"Alan Turing Institute, London, UK","institution_ids":["https://openalex.org/I4210128584"]},{"raw_affiliation_string":"University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035818555"],"corresponding_institution_ids":["https://openalex.org/I39555362"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.3407,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60694309,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"113","issue":"5","first_page":"2435","last_page":"2465"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9409000277519226,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9036999940872192,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8943468332290649},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7631884813308716},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.7009607553482056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.589126467704773},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5535979270935059},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5327382683753967},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.43737709522247314},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10242190957069397}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8943468332290649},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7631884813308716},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.7009607553482056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.589126467704773},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5535979270935059},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5327382683753967},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.43737709522247314},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10242190957069397},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-023-06500-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06500-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06500-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-023-06500-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06500-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06500-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.800000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1361938442","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3228942994","display_name":"Turing AI Fellowship: Advancing Multi-Agent Deep Reinforcement Learning for Sequential Decision Making in Real-World Applications","funder_award_id":"EP/V024868/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G361948359","display_name":null,"funder_award_id":"EPSRC EP","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4160449099","display_name":null,"funder_award_id":"EP/V024868/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8452545418","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4391543889.pdf"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2160808139","https://openalex.org/W2173248099","https://openalex.org/W2616311403","https://openalex.org/W2787938642","https://openalex.org/W2789008106","https://openalex.org/W2892110489","https://openalex.org/W2904453761","https://openalex.org/W2911169848","https://openalex.org/W2978455699","https://openalex.org/W3007440580","https://openalex.org/W3016525976","https://openalex.org/W3028695364","https://openalex.org/W3033324992","https://openalex.org/W3034767611","https://openalex.org/W3090369311","https://openalex.org/W3109943994","https://openalex.org/W3156232699","https://openalex.org/W3165718346","https://openalex.org/W3172360140","https://openalex.org/W3173049816","https://openalex.org/W3208334305","https://openalex.org/W3216772467","https://openalex.org/W4212774754","https://openalex.org/W4214717370","https://openalex.org/W4226265259","https://openalex.org/W4281550413","https://openalex.org/W4283031789","https://openalex.org/W4283657141","https://openalex.org/W4285601030","https://openalex.org/W4309981656","https://openalex.org/W6616173779","https://openalex.org/W6631190155","https://openalex.org/W6635701881","https://openalex.org/W6677916085","https://openalex.org/W6685664872","https://openalex.org/W6718092244","https://openalex.org/W6740801417","https://openalex.org/W6745983339","https://openalex.org/W6751540476","https://openalex.org/W6753060773","https://openalex.org/W6763407092","https://openalex.org/W6765008394","https://openalex.org/W6767047803","https://openalex.org/W6767649332","https://openalex.org/W6787348534","https://openalex.org/W6797745118"],"related_works":["https://openalex.org/W2180954594","https://openalex.org/W2052835778","https://openalex.org/W2049003611","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W3009457412","https://openalex.org/W2350784623","https://openalex.org/W2126211886"],"abstract_inverted_index":{"Abstract":[0],"Offline":[1,160],"reinforcement":[2],"learning":[3,47],"(RL)":[4],"aims":[5],"to":[6,24,64,67,90,101,137,139,145,148,186],"create":[7],"policies":[8,49],"for":[9,44,96],"sequential":[10],"decision-making":[11],"using":[12,41],"exclusively":[13],"offline":[14,45,166],"datasets.":[15],"This":[16,71],"presents":[17],"a":[18,32,69,92,99,102,111,125,175],"significant":[19],"challenge,":[20],"especially":[21,74],"when":[22],"attempting":[23],"accomplish":[25],"multiple":[26],"distinct":[27],"goals":[28],"or":[29],"outcomes":[30],"within":[31],"given":[33],"scenario":[34],"while":[35],"receiving":[36],"sparse":[37],"rewards.":[38],"Prior":[39],"methods":[40],"advantage":[42],"weighting":[43,114],"goal-conditioned":[46],"improve":[48],"monotonically.":[50],"However,":[51],"they":[52],"still":[53],"face":[54],"challenges":[55],"from":[56,98],"distribution":[57],"shift":[58],"and":[59],"multi-modality":[60],"that":[61,116,142,178],"arise":[62],"due":[63],"conflicting":[65],"ways":[66],"reach":[68],"goal.":[70,104],"issue":[72],"is":[73,152],"challenging":[75],"in":[76,168],"long-horizon":[77],"tasks,":[78],"where":[79],"the":[80,129,132,149,179,187],"presence":[81],"of":[82,121,128,134],"multiple,":[83],"often":[84],"conflicting,":[85],"solutions":[86],"makes":[87],"it":[88],"hard":[89],"identify":[91],"single":[93],"optimal":[94],"policy":[95,181],"transitioning":[97],"state":[100,130],"desired":[103],"To":[105],"address":[106],"these":[107],"challenges,":[108],"we":[109,173],"introduce":[110],"complementary":[112],"advantage-based":[113],"scheme":[115],"incorporates":[117],"an":[118],"additional":[119],"source":[120],"inductive":[122],"bias.":[123],"Given":[124],"value-based":[126],"partitioning":[127],"space,":[131],"contribution":[133],"actions":[135],"expected":[136],"lead":[138],"target":[140],"regions":[141],"are":[143],"easier":[144],"reach,":[146],"compared":[147],"final":[150],"goal,":[151],"further":[153],"increased.":[154],"Our":[155],"proposed":[156],"approach,":[157],"Dual-Advantage":[158],"Weighted":[159],"Goal-conditioned":[161],"RL,":[162],"outperforms":[163],"several":[164],"competing":[165],"algorithms":[167],"widely":[169],"used":[170],"benchmarks.":[171],"Furthermore,":[172],"provide":[174],"theoretical":[176],"guarantee":[177],"learned":[180],"will":[182],"not":[183],"be":[184],"inferior":[185],"underlying":[188],"behavior":[189],"policy.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
