{"id":"https://openalex.org/W4390548073","doi":"https://doi.org/10.1145/3632410.3632423","title":"Guiding Offline Reinforcement Learning Using a Safety Expert","display_name":"Guiding Offline Reinforcement Learning Using a Safety Expert","publication_year":2024,"publication_date":"2024-01-03","ids":{"openalex":"https://openalex.org/W4390548073","doi":"https://doi.org/10.1145/3632410.3632423"},"language":"en","primary_location":{"id":"doi:10.1145/3632410.3632423","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3632410.3632423","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th Joint International Conference on Data Science &amp; Management of Data (11th ACM IKDD CODS and 29th COMAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044006036","display_name":"Richa Verma","orcid":"https://orcid.org/0000-0001-9579-0559"},"institutions":[{"id":"https://openalex.org/I55215948","display_name":"Tata Consultancy Services (India)","ror":"https://ror.org/01b9n8m42","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210086519","https://openalex.org/I55215948"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Richa Verma","raw_affiliation_strings":["Tata Consultancy Services Ltd, India"],"affiliations":[{"raw_affiliation_string":"Tata Consultancy Services Ltd, India","institution_ids":["https://openalex.org/I55215948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010584534","display_name":"Durgesh Kalwar","orcid":"https://orcid.org/0000-0001-6392-7266"},"institutions":[{"id":"https://openalex.org/I55215948","display_name":"Tata Consultancy Services (India)","ror":"https://ror.org/01b9n8m42","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210086519","https://openalex.org/I55215948"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Durgesh Kalwar","raw_affiliation_strings":["Tata Consultancy Services Ltd, India"],"affiliations":[{"raw_affiliation_string":"Tata Consultancy Services Ltd, India","institution_ids":["https://openalex.org/I55215948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045214135","display_name":"Harshad Khadilkar","orcid":"https://orcid.org/0000-0003-3601-778X"},"institutions":[{"id":"https://openalex.org/I55215948","display_name":"Tata Consultancy Services (India)","ror":"https://ror.org/01b9n8m42","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210086519","https://openalex.org/I55215948"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Harshad Khadilkar","raw_affiliation_strings":["Tata Consultancy Services Ltd, India"],"affiliations":[{"raw_affiliation_string":"Tata Consultancy Services Ltd, India","institution_ids":["https://openalex.org/I55215948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009374923","display_name":"Balaraman Ravindran","orcid":"https://orcid.org/0000-0002-5364-7639"},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Balaraman Ravindran","raw_affiliation_strings":["Indian Institute of Technology Madras, India, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Madras, India, India","institution_ids":["https://openalex.org/I24676775"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044006036"],"corresponding_institution_ids":["https://openalex.org/I55215948"],"apc_list":null,"apc_paid":null,"fwci":0.3475,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60517833,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"82","last_page":"90"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9614999890327454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8378415107727051},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.775344967842102},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7526249289512634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5614301562309265},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4867727756500244},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.443208783864975}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8378415107727051},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.775344967842102},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7526249289512634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5614301562309265},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4867727756500244},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.443208783864975},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3632410.3632423","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3632410.3632423","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th Joint International Conference on Data Science &amp; Management of Data (11th ACM IKDD CODS and 29th COMAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6200000047683716,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W112971866","https://openalex.org/W2399750036","https://openalex.org/W2575705757","https://openalex.org/W2788862220","https://openalex.org/W2963044034","https://openalex.org/W2963099939","https://openalex.org/W2998145016","https://openalex.org/W3033324992","https://openalex.org/W3036250605","https://openalex.org/W3185083385","https://openalex.org/W4225636568","https://openalex.org/W4246078117"],"related_works":["https://openalex.org/W2768698792","https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4286629047","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4313488044","https://openalex.org/W3209574120"],"abstract_inverted_index":{"Offline":[0],"reinforcement":[1],"learning":[2],"is":[3,12],"used":[4],"to":[5,16,44,94,99,135,151],"train":[6],"policies":[7],"in":[8,50,65,70,84,109,112,118,141,162],"situations":[9],"where":[10],"it":[11],"expensive":[13],"or":[14,194],"infeasible":[15],"access":[17],"the":[18,35,45,55,58,71,79,89,113,122,153,171,176,181,222],"environment":[19],"during":[20],"training.":[21],"An":[22],"agent":[23,59,104,174],"trained":[24],"under":[25],"such":[26,211],"a":[27,96,146,163,187,204],"scenario":[28],"does":[29],"not":[30],"get":[31],"corrective":[32],"feedback":[33],"once":[34],"learned":[36],"policy":[37],"starts":[38],"diverging":[39],"and":[40],"may":[41],"fall":[42],"prey":[43],"overestimation":[46],"bias":[47],"commonly":[48],"seen":[49],"this":[51,75,119],"setting.":[52],"This":[53],"increases":[54],"chances":[56],"of":[57,81,88,224],"choosing":[60,106],"potentially":[61],"unsafe":[62,226],"actions,":[63],"especially":[64],"states":[66,111,131,167],"with":[67,132,168],"insufficient":[68],"representation":[69],"training":[72,164],"dataset.":[73,114,165],"In":[74,166],"paper,":[76],"we":[77],"explore":[78],"problem":[80],"acting":[82],"safely":[83],"sparsely":[85],"observed":[86],"regions":[87],"state":[90,154],"space.":[91],"We":[92,144,207],"propose":[93],"leverage":[95],"safety":[97,123,177,205],"expert":[98],"nudge":[100],"an":[101,127],"offline":[102,128,172,197,213],"RL":[103,173,198,214],"towards":[105],"safe":[107],"actions":[108,227],"under-represented":[110],"The":[115],"proposed":[116],"framework":[117],"paper":[120],"transfers":[121],"expert\u2019s":[124],"knowledge":[125],"into":[126],"setting":[129],"for":[130],"high":[133,169],"uncertainty":[134,155],"prevent":[136],"catastrophic":[137],"failures":[138],"from":[139],"occurring":[140],"safety-critical":[142],"domains.":[143],"use":[145],"simple":[147],"but":[148],"effective":[149],"approach":[150,185],"quantify":[152],"based":[156],"on":[157],"how":[158],"frequently":[159],"they":[160],"appear":[161],"uncertainty,":[170],"mimics":[175],"expert,":[178],"otherwise":[179],"maximizing":[180],"long-term":[182],"reward.":[183],"Our":[184],"has":[186],"plug-and-play":[188],"nature,":[189],"i.e.,":[190],"any":[191],"existing":[192],"value-based":[193],"actor-critic":[195],"style":[196],"algorithm":[199],"can":[200,216],"be":[201],"guided":[202,212],"by":[203],"expert.":[206],"finally":[208],"show":[209],"that":[210],"algorithms":[215],"outperform":[217],"their":[218],"state-of-the-art":[219],"counterparts,":[220],"reducing":[221],"chance":[223],"taking":[225],"while":[228],"simultaneously":[229],"retaining":[230],"competitive":[231],"performance.":[232]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
