{"id":"https://openalex.org/W4312255334","doi":"https://doi.org/10.1109/iros47612.2022.9981286","title":"Analyzing and Overcoming Degradation in Warm-Start Reinforcement Learning","display_name":"Analyzing and Overcoming Degradation in Warm-Start Reinforcement Learning","publication_year":2022,"publication_date":"2022-10-23","ids":{"openalex":"https://openalex.org/W4312255334","doi":"https://doi.org/10.1109/iros47612.2022.9981286"},"language":"en","primary_location":{"id":"doi:10.1109/iros47612.2022.9981286","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9981286","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062577955","display_name":"Benjamin Wexler","orcid":null},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Benjamin Wexler","raw_affiliation_strings":["Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel","Department of Computer Science, Bar-Ilan University, Ramat-Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel","institution_ids":["https://openalex.org/I13955877"]},{"raw_affiliation_string":"Department of Computer Science, Bar-Ilan University, Ramat-Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024401360","display_name":"Elad Sarafian","orcid":"https://orcid.org/0000-0002-3271-6308"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Elad Sarafian","raw_affiliation_strings":["Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel","Department of Computer Science, Bar-Ilan University, Ramat-Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel","institution_ids":["https://openalex.org/I13955877"]},{"raw_affiliation_string":"Department of Computer Science, Bar-Ilan University, Ramat-Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103213461","display_name":"Sarit Kraus","orcid":"https://orcid.org/0000-0003-4672-623X"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Sarit Kraus","raw_affiliation_strings":["Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel","Department of Computer Science, Bar-Ilan University, Ramat-Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel","institution_ids":["https://openalex.org/I13955877"]},{"raw_affiliation_string":"Department of Computer Science, Bar-Ilan University, Ramat-Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062577955"],"corresponding_institution_ids":["https://openalex.org/I13955877"],"apc_list":null,"apc_paid":null,"fwci":0.4174,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.59247483,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4048","last_page":"4055"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.963699996471405,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.8203345537185669},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8072527050971985},{"id":"https://openalex.org/keywords/degradation","display_name":"Degradation (telecommunications)","score":0.6624094843864441},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6606942415237427},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.49400025606155396},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48526784777641296},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4485829174518585},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.43387675285339355},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.41993483901023865},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3662797808647156},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3121486306190491},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1868824064731598},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11276835203170776}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.8203345537185669},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8072527050971985},{"id":"https://openalex.org/C2779679103","wikidata":"https://www.wikidata.org/wiki/Q5251805","display_name":"Degradation (telecommunications)","level":2,"score":0.6624094843864441},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6606942415237427},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.49400025606155396},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48526784777641296},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4485829174518585},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.43387675285339355},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.41993483901023865},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3662797808647156},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3121486306190491},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1868824064731598},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11276835203170776},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros47612.2022.9981286","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9981286","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1999874108","https://openalex.org/W2112552549","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2606868178","https://openalex.org/W2736601468","https://openalex.org/W2741122588","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2804930149","https://openalex.org/W2904453761","https://openalex.org/W2947150733","https://openalex.org/W2953981431","https://openalex.org/W2963099939","https://openalex.org/W2973229164","https://openalex.org/W2979211489","https://openalex.org/W2989847975","https://openalex.org/W2991355586","https://openalex.org/W2993185773","https://openalex.org/W2994722919","https://openalex.org/W2996037775","https://openalex.org/W3012291041","https://openalex.org/W3020712699","https://openalex.org/W3022566517","https://openalex.org/W3033324992","https://openalex.org/W3035367521","https://openalex.org/W3037625705","https://openalex.org/W3084269620","https://openalex.org/W3092499445","https://openalex.org/W3106539628","https://openalex.org/W3118210634","https://openalex.org/W3166795773","https://openalex.org/W3201629542","https://openalex.org/W4300799055","https://openalex.org/W6676726796","https://openalex.org/W6683204974","https://openalex.org/W6740801417","https://openalex.org/W6757469721","https://openalex.org/W6771428195","https://openalex.org/W6771876938","https://openalex.org/W6775041404","https://openalex.org/W6779265984","https://openalex.org/W6783140480","https://openalex.org/W6784247795","https://openalex.org/W6792049622"],"related_works":["https://openalex.org/W2140186469","https://openalex.org/W4390421286","https://openalex.org/W4280563792","https://openalex.org/W4389724018","https://openalex.org/W4318719684","https://openalex.org/W3183136280","https://openalex.org/W4318559728","https://openalex.org/W2775233965","https://openalex.org/W4360995913","https://openalex.org/W4281847915"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1,124],"(RL)":[2],"for":[3,126,181,198],"robotic":[4],"applications":[5],"can":[6,30],"benefit":[7],"from":[8,114],"a":[9,17,59,119,143,156,165,182],"warm-start":[10],"where":[11],"the":[12,35,47,56,72,79,88,108,135,147,150,174,203,211,221],"agent":[13],"is":[14,50],"initialized":[15],"with":[16],"pretrained":[18,48],"behavioral":[19,73,95],"policy.":[20],"However,":[21],"when":[22],"transitioning":[23],"to":[24,44,52,66,91,94,107,142,172,213,220],"RL":[25,110],"updates,":[26],"degradation":[27,82,131],"in":[28,55,71],"performance":[29,188,216],"occur,":[31],"which":[32,40,112],"may":[33],"compromise":[34],"robot's":[36,222],"safety.":[37,223],"This":[38],"degradation,":[39,175],"constitutes":[41],"an":[42],"inability":[43],"properly":[45],"utilize":[46],"policy,":[49],"attributed":[51],"extrapolation":[53],"error":[54],"value":[57],"function,":[58],"result":[60],"of":[61,81,101,146,167,206],"high":[62],"values":[63],"being":[64],"assigned":[65],"Out-Of-Distribution":[67],"actions":[68],"not":[69],"present":[70,98],"policy's":[74],"data.":[75],"We":[76,97,117,163],"investigate":[77,164],"why":[78,87],"magnitude":[80],"varies":[83],"across":[84],"policies":[85],"and":[86,104,138,176,189,217],"policy":[89,136],"fails":[90],"quickly":[92],"return":[93],"performance.":[96],"visual":[99],"confirmation":[100],"our":[102],"analysis":[103],"draw":[105],"comparisons":[106],"Offline":[109],"setting":[111],"suffers":[113],"similar":[115],"difficulties.":[116],"propose":[118,155],"novel":[120,157],"method,":[121],"Confidence":[122],"Constrained":[123],"(CCL)":[125],"Warm-Start":[127],"RL,":[128],"that":[129,170,195],"reduces":[130],"by":[132],"balancing":[133],"between":[134,186,215],"gradient":[137],"constrained":[139,151],"learning":[140,152],"according":[141],"confidence":[144],"measure":[145],"Q-values.":[148],"For":[149],"component":[153],"we":[154],"objective,":[158],"Positive":[159],"Q-value":[160],"Distance":[161],"(CCL-PQD).":[162],"variety":[166],"constraint-based":[168],"methods":[169],"aim":[171],"overcome":[173],"find":[177],"they":[178],"constitute":[179],"solutions":[180,201],"multi-objective":[183,208],"optimization":[184],"problem":[185],"maximimal":[187],"miniminal":[190],"degradation.":[191],"Our":[192],"results":[193],"demonstrate":[194],"hyperparameter":[196],"tuning":[197],"CCL-PQD":[199],"produces":[200],"on":[202],"Pareto":[204],"Front":[205],"this":[207],"problem,":[209],"allowing":[210],"user":[212],"balance":[214],"tolerable":[218],"compromises":[219]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
