{"id":"https://openalex.org/W2972522277","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207344","title":"Learning Transferable Domain Priors for Safe Exploration in Reinforcement Learning","display_name":"Learning Transferable Domain Priors for Safe Exploration in Reinforcement Learning","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W2972522277","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207344","mag":"2972522277"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn48605.2020.9207344","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207344","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.04307","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043916805","display_name":"Thommen George Karimpanal","orcid":"https://orcid.org/0000-0001-8918-3314"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Thommen George Karimpanal","raw_affiliation_strings":["Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024215125","display_name":"Santu Rana","orcid":"https://orcid.org/0000-0003-2247-850X"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Santu Rana","raw_affiliation_strings":["Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011012522","display_name":"Sunil Gupta","orcid":"https://orcid.org/0000-0002-4669-9940"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sunil Gupta","raw_affiliation_strings":["Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085471517","display_name":"Truyen Tran","orcid":"https://orcid.org/0000-0001-6531-8907"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Truyen Tran","raw_affiliation_strings":["Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045540854","display_name":"Svetha Venkatesh","orcid":"https://orcid.org/0000-0001-8675-6631"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Svetha Venkatesh","raw_affiliation_strings":["Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5043916805"],"corresponding_institution_ids":["https://openalex.org/I149704539"],"apc_list":null,"apc_paid":null,"fwci":0.1371,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52027835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.9218677282333374},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8540182113647461},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7326493859291077},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6673951745033264},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.6582682132720947},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6114678382873535},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5430876016616821},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5250127911567688},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3261752426624298},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14865627884864807},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.126553475856781},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.06180599331855774}],"concepts":[{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.9218677282333374},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8540182113647461},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7326493859291077},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6673951745033264},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.6582682132720947},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6114678382873535},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5430876016616821},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5250127911567688},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3261752426624298},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14865627884864807},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.126553475856781},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06180599331855774},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1109/ijcnn48605.2020.9207344","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207344","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.04307","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.04307","pdf_url":"https://arxiv.org/pdf/1909.04307","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2972522277","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1909.04307v5","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:dro.deakin.edu.au:DU:30146997","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401102","display_name":"Own your potential (DEAKIN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149704539","host_organization_name":"Deakin University","host_organization_lineage":["https://openalex.org/I149704539"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"},{"id":"pmh:oai:figshare.com:article/20684563","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Learning_Transferable_Domain_Priors_for_Safe_Exploration_in_Reinforcement_Learning/20684563","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"doi:10.48550/arxiv.1909.04307","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1909.04307","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.04307","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.04307","pdf_url":"https://arxiv.org/pdf/1909.04307","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.46000000834465027,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2972522277.pdf","grobid_xml":"https://content.openalex.org/works/W2972522277.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1757796397","https://openalex.org/W1840625103","https://openalex.org/W1845972764","https://openalex.org/W1949804828","https://openalex.org/W2031727428","https://openalex.org/W2097381042","https://openalex.org/W2114901408","https://openalex.org/W2121863487","https://openalex.org/W2131600418","https://openalex.org/W2145339207","https://openalex.org/W2159459871","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2736601468","https://openalex.org/W2768908787","https://openalex.org/W2782656435","https://openalex.org/W2789517807","https://openalex.org/W2797734773","https://openalex.org/W2926028278","https://openalex.org/W2951871955","https://openalex.org/W2962717849","https://openalex.org/W2962803570","https://openalex.org/W2963065769","https://openalex.org/W2963400359","https://openalex.org/W2963575966","https://openalex.org/W2963771109","https://openalex.org/W2964043796","https://openalex.org/W3104013016","https://openalex.org/W4241521318","https://openalex.org/W6631190155","https://openalex.org/W6637967152","https://openalex.org/W6639175102","https://openalex.org/W6674600207","https://openalex.org/W6677370284","https://openalex.org/W6677916085","https://openalex.org/W6683149736","https://openalex.org/W6684921986","https://openalex.org/W6692846177","https://openalex.org/W6718190810","https://openalex.org/W6737893269","https://openalex.org/W6741002519","https://openalex.org/W6744794241","https://openalex.org/W6746721349","https://openalex.org/W6748089505","https://openalex.org/W6748306599","https://openalex.org/W6748440607","https://openalex.org/W6748595559","https://openalex.org/W6749992314","https://openalex.org/W6752187413","https://openalex.org/W6755612348","https://openalex.org/W6780559895","https://openalex.org/W6999387429"],"related_works":["https://openalex.org/W326419249","https://openalex.org/W2619583400","https://openalex.org/W2897007337","https://openalex.org/W3096684011","https://openalex.org/W1607218107","https://openalex.org/W2097498341","https://openalex.org/W2996274873","https://openalex.org/W3091476481","https://openalex.org/W2981322556","https://openalex.org/W2998135952","https://openalex.org/W2097113539","https://openalex.org/W2854769010","https://openalex.org/W2949969799","https://openalex.org/W3092535550","https://openalex.org/W2948199445","https://openalex.org/W2585047082","https://openalex.org/W2996110979","https://openalex.org/W2990181595","https://openalex.org/W3131852878","https://openalex.org/W2774756601"],"abstract_inverted_index":{"Prior":[0],"access":[1],"to":[2,30,56,63,81,105,128,141],"domain":[3],"knowledge":[4],"could":[5,18,158],"significantly":[6],"improve":[7],"the":[8,72,93,99,133,148],"performance":[9],"of":[10,46,95,154],"a":[11,44,58,122,138],"reinforcement":[12],"learning":[13,94,127],"agent.":[14],"In":[15,35,61],"particular,":[16],"it":[17,120],"help":[19],"agents":[20],"avoid":[21],"potentially":[22],"catastrophic":[23],"exploratory":[24,66,124],"actions,":[25],"which":[26,157],"would":[27],"otherwise":[28],"have":[29],"be":[31,86,160],"experienced":[32],"during":[33],"learning.":[34],"this":[36,155],"work,":[37],"we":[38,74],"identify":[39],"consistently":[40],"undesirable":[41],"actions":[42],"in":[43,68,71,90,98,109,132,162],"set":[45],"previously":[47],"learned":[48,87],"tasks,":[49],"and":[50,84,89,117,145,150],"use":[51],"pseudo-rewards":[52],"associated":[53],"with":[54,92],"them":[55],"learn":[57],"prior":[59],"policy.":[60],"addition":[62],"enabling":[64],"safer":[65,123],"behaviors":[67],"subsequent":[69],"tasks":[70,97,131],"domain,":[73],"show":[75],"that":[76,119],"these":[77,143],"priors":[78],"are":[79],"transferable":[80],"similar":[82],"environments,":[83,116],"can":[85],"off-policy":[88],"parallel":[91],"other":[96],"domain.":[100,134],"We":[101,135],"compare":[102],"our":[103],"approach":[104],"established,":[106],"state-of-the-art":[107],"algorithms":[108],"both":[110],"discrete":[111],"as":[112,114],"well":[113],"continuous":[115],"demonstrate":[118],"exhibits":[121],"behavior":[125],"while":[126],"perform":[129],"arbitrary":[130],"also":[136,159],"present":[137],"theoretical":[139],"analysis":[140],"support":[142],"results,":[144],"briefly":[146],"discuss":[147],"implications":[149],"some":[151],"alternative":[152],"formulations":[153],"approach,":[156],"useful":[161],"certain":[163],"scenarios.":[164]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
