{"id":"https://openalex.org/W4388406310","doi":"https://doi.org/10.1007/s10994-023-06458-y","title":"Balancing policy constraint and ensemble size in uncertainty-based offline reinforcement learning","display_name":"Balancing policy constraint and ensemble size in uncertainty-based offline reinforcement learning","publication_year":2023,"publication_date":"2023-11-06","ids":{"openalex":"https://openalex.org/W4388406310","doi":"https://doi.org/10.1007/s10994-023-06458-y"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-023-06458-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06458-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06458-y.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06458-y.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070296371","display_name":"Alex Beeson","orcid":null},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alex Beeson","raw_affiliation_strings":["WMG, University of Warwick, Coventry, UK","Warwick Medical School, University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"WMG, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]},{"raw_affiliation_string":"Warwick Medical School, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010581004","display_name":"Giovanni Montana","orcid":"https://orcid.org/0000-0003-3942-3900"},"institutions":[{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]},{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Giovanni Montana","raw_affiliation_strings":["Alan Turing Institute, London, UK","Department of Statistics, University of Warwick, Coventry, UK","WMG, University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"Alan Turing Institute, London, UK","institution_ids":["https://openalex.org/I4210128584"]},{"raw_affiliation_string":"Department of Statistics, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]},{"raw_affiliation_string":"WMG, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5010581004"],"corresponding_institution_ids":["https://openalex.org/I39555362","https://openalex.org/I4210128584"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.8914,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79778348,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"113","issue":"1","first_page":"443","last_page":"488"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8311097621917725},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7575124502182007},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7027822732925415},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.5877702832221985},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5660812854766846},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5159690976142883},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.5037214159965515},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4933125078678131},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44512343406677246},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4302581548690796},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4153357744216919},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.39144206047058105},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.2222667932510376},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15375512838363647}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8311097621917725},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7575124502182007},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7027822732925415},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.5877702832221985},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5660812854766846},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5159690976142883},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.5037214159965515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4933125078678131},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44512343406677246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4302581548690796},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4153357744216919},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.39144206047058105},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.2222667932510376},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15375512838363647},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-023-06458-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06458-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06458-y.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-023-06458-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06458-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06458-y.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1361938442","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3228942994","display_name":"Turing AI Fellowship: Advancing Multi-Agent Deep Reinforcement Learning for Sequential Decision Making in Real-World Applications","funder_award_id":"EP/V024868/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G361948359","display_name":null,"funder_award_id":"EPSRC EP","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4160449099","display_name":null,"funder_award_id":"EP/V024868/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320320279","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388406310.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W192920577","https://openalex.org/W2131600418","https://openalex.org/W2158782408","https://openalex.org/W2188365844","https://openalex.org/W2559655401","https://openalex.org/W2761873684","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2890803796","https://openalex.org/W2896893468","https://openalex.org/W2904453761","https://openalex.org/W3014596384","https://openalex.org/W3025019304","https://openalex.org/W3041764008","https://openalex.org/W3102100346","https://openalex.org/W3127561923","https://openalex.org/W3129049565","https://openalex.org/W3132227397","https://openalex.org/W3134774296","https://openalex.org/W3136208045","https://openalex.org/W3164731060","https://openalex.org/W3170016383","https://openalex.org/W3170697419","https://openalex.org/W3203827806","https://openalex.org/W3210146484","https://openalex.org/W3216656735","https://openalex.org/W4212774754","https://openalex.org/W4221158443","https://openalex.org/W4236654676","https://openalex.org/W4281794919","https://openalex.org/W4297838928","https://openalex.org/W4383109513","https://openalex.org/W6606882031","https://openalex.org/W6677916085","https://openalex.org/W6685664872","https://openalex.org/W6767334564"],"related_works":["https://openalex.org/W4225619808","https://openalex.org/W4388926065","https://openalex.org/W4386160446","https://openalex.org/W4387545330","https://openalex.org/W4226042081","https://openalex.org/W4284974072","https://openalex.org/W4376223516","https://openalex.org/W4283712691","https://openalex.org/W4308935744","https://openalex.org/W3097615968"],"abstract_inverted_index":{"Abstract":[0],"Offline":[1],"reinforcement":[2],"learning":[3,32],"agents":[4,16,49],"seek":[5],"optimal":[6],"policies":[7,53],"from":[8,26],"fixed":[9],"data":[10],"sets.":[11],"With":[12],"environmental":[13],"interaction":[14],"prohibited,":[15],"face":[17],"significant":[18,70],"challenges":[19],"in":[20,23,69],"preventing":[21],"errors":[22],"value":[24,46],"estimates":[25],"compounding":[27],"and":[28,89,97],"subsequently":[29],"causing":[30],"the":[31,59,78,90],"process":[33],"to":[34,50,64],"collapse.":[35],"Uncertainty":[36],"estimation":[37],"using":[38],"ensembles":[39,63],"compensates":[40],"for":[41,61,86,147],"this":[42,74],"by":[43],"penalising":[44],"high-variance":[45],"estimates,":[47],"allowing":[48,146],"learn":[51],"robust":[52],"based":[54],"on":[55,130],"data-driven":[56],"actions.":[57],"However,":[58],"requirement":[60],"large":[62],"facilitate":[65,141],"sufficient":[66,111],"penalisation":[67,112],"results":[68],"computational":[71,124],"overhead.":[72],"In":[73],"work,":[75],"we":[76,107,134],"examine":[77],"role":[79],"of":[80,95],"policy":[81,105,149],"constraints":[82],"as":[83],"a":[84,117],"mechanism":[85],"regulating":[87],"uncertainty,":[88],"corresponding":[91],"balance":[92],"between":[93],"level":[94],"constraint":[96],"ensemble":[98,120],"size.":[99],"By":[100],"incorporating":[101],"behavioural":[102],"cloning":[103],"into":[104],"updates,":[106],"show":[108,135],"empirically":[109],"that":[110],"can":[113,140],"be":[114],"achieved":[115],"with":[116],"much":[118],"smaller":[119],"size,":[121],"substantially":[122],"reducing":[123],"demand":[125],"while":[126,151],"retaining":[127],"state-of-the-art":[128],"performance":[129,154],"benchmarking":[131],"tasks.":[132],"Furthermore,":[133],"how":[136],"such":[137],"an":[138],"approach":[139],"stable":[142],"online":[143],"fine":[144],"tuning,":[145],"continued":[148],"improvement":[150],"avoiding":[152],"severe":[153],"drops.":[155]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
