{"id":"https://openalex.org/W4401415366","doi":"https://doi.org/10.1109/icra57147.2024.10611123","title":"Guided Online Distillation: Promoting Safe Reinforcement Learning by Offline Demonstration","display_name":"Guided Online Distillation: Promoting Safe Reinforcement Learning by Offline Demonstration","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401415366","doi":"https://doi.org/10.1109/icra57147.2024.10611123"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611123","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611123","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065279959","display_name":"Jinning Li","orcid":"https://orcid.org/0000-0002-7335-5553"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinning Li","raw_affiliation_strings":["University of California,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley,CA,USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109000036","display_name":"Xinyi Liu","orcid":"https://orcid.org/0000-0002-2682-0088"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinyi Liu","raw_affiliation_strings":["University of Michigan,Ann Arbor,MI,USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan,Ann Arbor,MI,USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031613608","display_name":"Banghua Zhu","orcid":"https://orcid.org/0000-0002-7320-3533"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Banghua Zhu","raw_affiliation_strings":["University of California,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley,CA,USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034192173","display_name":"Jiantao Jiao","orcid":"https://orcid.org/0000-0003-3766-8031"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiantao Jiao","raw_affiliation_strings":["University of California,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley,CA,USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064077634","display_name":"Masayoshi Tomizuka","orcid":"https://orcid.org/0000-0003-0206-6639"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Masayoshi Tomizuka","raw_affiliation_strings":["University of California,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley,CA,USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101867083","display_name":"Chen Tang","orcid":"https://orcid.org/0000-0002-7536-9983"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Tang","raw_affiliation_strings":["University of California,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley,CA,USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031565603","display_name":"Wei Zhan","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Zhan","raw_affiliation_strings":["University of California,Berkeley,CA,USA"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley,CA,USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5065279959"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":1.0878,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.80874137,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"7447","last_page":"7454"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8000857830047607},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.7511568069458008},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.69657301902771},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44697415828704834},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4303407073020935},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.42402321100234985},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.17267683148384094},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.08379536867141724},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.07385927438735962}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8000857830047607},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.7511568069458008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69657301902771},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44697415828704834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4303407073020935},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.42402321100234985},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.17267683148384094},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08379536867141724},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.07385927438735962},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611123","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611123","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W2027579135","https://openalex.org/W2604382266","https://openalex.org/W2736601468","https://openalex.org/W2781726626","https://openalex.org/W2913756371","https://openalex.org/W2955189650","https://openalex.org/W2962515681","https://openalex.org/W2962894046","https://openalex.org/W3009331570","https://openalex.org/W3035574168","https://openalex.org/W3037804676","https://openalex.org/W3090747022","https://openalex.org/W3124514933","https://openalex.org/W3150718622","https://openalex.org/W3156216502","https://openalex.org/W3169291081","https://openalex.org/W3184258555","https://openalex.org/W3187722890","https://openalex.org/W3196685263","https://openalex.org/W3203076355","https://openalex.org/W3205367325","https://openalex.org/W3205794883","https://openalex.org/W4220747123","https://openalex.org/W4221070022","https://openalex.org/W4223423875","https://openalex.org/W4281550413","https://openalex.org/W4281716982","https://openalex.org/W4287756699","https://openalex.org/W4292779060","https://openalex.org/W4323927473","https://openalex.org/W4385245566","https://openalex.org/W4388903500","https://openalex.org/W4401415071","https://openalex.org/W6732837357","https://openalex.org/W6737893269","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6759225949","https://openalex.org/W6778883912","https://openalex.org/W6779656125","https://openalex.org/W6780312303","https://openalex.org/W6790978476","https://openalex.org/W6796289742","https://openalex.org/W6799150178","https://openalex.org/W6802363888","https://openalex.org/W6802659552","https://openalex.org/W6810450869","https://openalex.org/W6810653392","https://openalex.org/W6810667139","https://openalex.org/W6838188116","https://openalex.org/W6850790624","https://openalex.org/W6857244198","https://openalex.org/W6904861994"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Safe":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"aims":[4],"to":[5,25,59,67,81,107],"find":[6],"a":[7,64,153],"policy":[8,55,86,114,151,155,169],"that":[9,52,196],"achieves":[10],"high":[11],"rewards":[12],"while":[13],"satisfying":[14],"cost":[15],"constraints.":[16],"When":[17],"learning":[18],"from":[19,56],"scratch,":[20],"safe":[21,143,160,172,179],"RL":[22,144,161,173,180],"agents":[23],"tend":[24],"be":[26,82],"overly":[27],"conservative,":[28],"which":[29,100,163],"impedes":[30],"exploration":[31,62],"and":[32,170,182,203],"restrains":[33],"the":[34,69,105,118,166,188],"overall":[35],"performance.":[36],"In":[37],"many":[38],"realistic":[39],"tasks,":[40],"e.g.":[41,74],"autonomous":[42,130],"driving,":[43],"large-scale":[44],"expert":[45,54],"demonstration":[46],"data":[47,58,89],"are":[48],"available.":[49],"We":[50],"argue":[51],"extracting":[53],"offline":[57,85,149,167],"guide":[60],"online":[61,159,171],"is":[63],"promising":[65],"solution":[66],"mitigate":[68],"conserveness":[70],"issue.":[71],"Large-capacity":[72],"models,":[73],"decision":[75],"transformers":[76],"(DT),":[77],"have":[78],"been":[79],"proven":[80],"competent":[83],"in":[84,91,176,207],"learning.":[87],"However,":[88],"collected":[90],"realworld":[92],"scenarios":[93],"rarely":[94],"contain":[95],"dangerous":[96],"cases":[97],"(e.g.,":[98],"collisions),":[99],"makes":[101],"it":[102],"prohibitive":[103],"for":[104],"policies":[106,202],"learn":[108],"safety":[109],"concepts.":[110],"Besides,":[111],"these":[112],"bulk":[113],"networks":[115],"cannot":[116],"meet":[117],"computation":[119],"speed":[120],"requirements":[121],"at":[122],"inference":[123],"time":[124],"on":[125,187],"real-world":[126,183],"tasks":[127,181,185],"such":[128],"as":[129],"driving.":[131],"To":[132],"this":[133],"end,":[134],"we":[135],"propose":[136],"Guided":[137],"Online":[138],"Distillation":[139],"(GOLD),":[140],"an":[141,148],"offline-to-online":[142],"framework.":[145],"GOLD":[146,197],"distills":[147],"DT":[150,168],"into":[152],"lightweight":[154,201],"network":[156],"through":[157],"guided":[158],"training,":[162],"outperforms":[164],"both":[165,177],"algorithms.":[174],"Experiments":[175],"benchmark":[178],"driving":[184],"based":[186],"Waymo":[189],"Open":[190],"Motion":[191],"Dataset":[192],"(WOMD)":[193],"[1]":[194],"demonstrate":[195],"can":[198],"successfully":[199],"distill":[200],"solve":[204],"decision-making":[205],"problems":[206],"challenging":[208],"safety-critical":[209],"scenarios.":[210]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
