{"id":"https://openalex.org/W4297838928","doi":"https://doi.org/10.14428/esann/2022.es2022-110","title":"Adaptive Behavior Cloning Regularization for Stable Offline-to-Online Reinforcement Learning","display_name":"Adaptive Behavior Cloning Regularization for Stable Offline-to-Online Reinforcement Learning","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4297838928","doi":"https://doi.org/10.14428/esann/2022.es2022-110"},"language":"en","primary_location":{"id":"doi:10.14428/esann/2022.es2022-110","is_oa":true,"landing_page_url":"https://doi.org/10.14428/esann/2022.es2022-110","pdf_url":"https://doi.org/10.14428/esann/2022.es2022-110","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ESANN 2022 proceedings","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.14428/esann/2022.es2022-110","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100458737","display_name":"Yi Zhao","orcid":"https://orcid.org/0000-0003-2803-0933"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Yi Zhao","raw_affiliation_strings":["-Aalto University -Department of Electrical Engineering and Automation"],"affiliations":[{"raw_affiliation_string":"-Aalto University -Department of Electrical Engineering and Automation","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065850081","display_name":"Rinu Boney","orcid":"https://orcid.org/0000-0002-6968-7109"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Rinu Boney","raw_affiliation_strings":["-Aalto Universiity -Department of Computer Science -Finland"],"affiliations":[{"raw_affiliation_string":"-Aalto Universiity -Department of Computer Science -Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103933556","display_name":"Alexander Ilin","orcid":"https://orcid.org/0000-0001-6419-3006"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Alexander Ilin","raw_affiliation_strings":["-Aalto Universiity -Department of Computer Science -Finland"],"affiliations":[{"raw_affiliation_string":"-Aalto Universiity -Department of Computer Science -Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057931031","display_name":"Juho Kannala","orcid":"https://orcid.org/0000-0001-5088-4041"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Juho Kannala","raw_affiliation_strings":["-Aalto Universiity -Department of Computer Science -Finland"],"affiliations":[{"raw_affiliation_string":"-Aalto Universiity -Department of Computer Science -Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017983137","display_name":"Joni Pajarinen","orcid":"https://orcid.org/0000-0003-4469-8191"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]},{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE","FI"],"is_corresponding":false,"raw_author_name":"Joni Pajarinen","raw_affiliation_strings":["-Aalto University -Department of Electrical Engineering and Automation","-Technical University Darmstadt -Department of Computer Science -Germany"],"affiliations":[{"raw_affiliation_string":"-Aalto University -Department of Electrical Engineering and Automation","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"-Technical University Darmstadt -Department of Computer Science -Germany","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100458737"],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":1.7241,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86783371,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"545","last_page":"550"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9061907529830933},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7561690807342529},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.7204210758209229},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.678583025932312},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.5873006582260132},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5853638648986816},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5608709454536438},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5564539432525635},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5244688987731934},{"id":"https://openalex.org/keywords/online-and-offline","display_name":"Online and offline","score":0.5045894384384155},{"id":"https://openalex.org/keywords/cloning","display_name":"Cloning (programming)","score":0.49504175782203674},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4675076901912689},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.11826005578041077},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10290750861167908}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9061907529830933},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7561690807342529},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.7204210758209229},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.678583025932312},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.5873006582260132},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5853638648986816},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5608709454536438},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5564539432525635},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5244688987731934},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.5045894384384155},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.49504175782203674},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4675076901912689},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.11826005578041077},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10290750861167908},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14428/esann/2022.es2022-110","is_oa":true,"landing_page_url":"https://doi.org/10.14428/esann/2022.es2022-110","pdf_url":"https://doi.org/10.14428/esann/2022.es2022-110","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ESANN 2022 proceedings","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.14428/esann/2022.es2022-110","is_oa":true,"landing_page_url":"https://doi.org/10.14428/esann/2022.es2022-110","pdf_url":"https://doi.org/10.14428/esann/2022.es2022-110","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ESANN 2022 proceedings","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4297838928.pdf","grobid_xml":"https://content.openalex.org/works/W4297838928.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W2787938642","https://openalex.org/W2904453761","https://openalex.org/W2952295663","https://openalex.org/W2995706821","https://openalex.org/W3009584650","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3033324992","https://openalex.org/W3121786643","https://openalex.org/W3172360140","https://openalex.org/W3208796191","https://openalex.org/W4287388225","https://openalex.org/W4287756699"],"related_works":["https://openalex.org/W4221145086","https://openalex.org/W4226221094","https://openalex.org/W4387293922","https://openalex.org/W4225619808","https://openalex.org/W4388482290","https://openalex.org/W4388872820","https://openalex.org/W4220668416","https://openalex.org/W4318719276","https://openalex.org/W92125585","https://openalex.org/W3207447243"],"abstract_inverted_index":{"Offline":[0],"reinforcement":[1,128],"learning,":[2],"by":[3,45,111],"learning":[4,117,129],"from":[5,68],"a":[6,96,113],"fixed":[7],"dataset,":[8,29],"makes":[9],"it":[10],"possible":[11],"to":[12,41,63,70,75,102],"learn":[13],"agent":[14,58],"behaviors":[15],"without":[16],"interacting":[17,46],"with":[18,47],"the":[19,24,27,48,53,56,64,78,87,105,122,132],"environment.":[20,49],"However,":[21],"depending":[22],"on":[23,86,131],"quality":[25],"of":[26,55,99,108,116],"offline":[28,69],"such":[30],"pre-trained":[31,57],"agents":[32],"may":[33,59],"have":[34],"limited":[35],"performance":[36,54,89,130],"and":[37,90],"would":[38],"further":[39,103],"need":[40],"be":[42],"fine-tuned":[43],"online":[44,51,71,83,109],"During":[50],"fine-tuning,":[52],"collapse":[60],"quickly":[61],"due":[62],"sudden":[65],"distribution":[66],"shift":[67],"data.":[72],"We":[73],"propose":[74],"adaptively":[76],"weigh":[77],"behavior":[79],"cloning":[80],"loss":[81],"during":[82],"fine-tuning":[84,110],"based":[85],"agent's":[88],"training":[91],"stability.":[92],"Moreover,":[93],"we":[94],"use":[95],"randomized":[97],"ensemble":[98],"Q":[100],"functions":[101],"increase":[104],"sample":[106],"efficiency":[107],"performing":[112],"large":[114],"number":[115],"updates.":[118],"Experiments":[119],"show":[120],"that":[121],"proposed":[123],"method":[124],"yields":[125],"state-of-the-art":[126],"offline-to-online":[127],"popular":[133],"D4RL":[134],"benchmark.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
