{"id":"https://openalex.org/W2968771440","doi":"https://doi.org/10.1007/s11229-021-03141-4","title":"Reward tampering problems and solutions in reinforcement learning: a causal influence diagram perspective","display_name":"Reward tampering problems and solutions in reinforcement learning: a causal influence diagram perspective","publication_year":2021,"publication_date":"2021-05-19","ids":{"openalex":"https://openalex.org/W2968771440","doi":"https://doi.org/10.1007/s11229-021-03141-4","mag":"2968771440"},"language":"en","primary_location":{"id":"doi:10.1007/s11229-021-03141-4","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11229-021-03141-4","pdf_url":null,"source":{"id":"https://openalex.org/S255146","display_name":"Synthese","issn_l":"0039-7857","issn":["0039-7857","1573-0964"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Synthese","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1908.04734","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020224050","display_name":"Tom Everitt","orcid":"https://orcid.org/0000-0003-1210-9866"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["AU","GB","US"],"is_corresponding":true,"raw_author_name":"Tom Everitt","raw_affiliation_strings":["Australian National University, Canberra, ACT, Australia","DeepMind, London, UK","Google,,,,,"],"raw_orcid":"https://orcid.org/0000-0003-1210-9866","affiliations":[{"raw_affiliation_string":"Australian National University, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073944062","display_name":"Marcus H\u00fctter","orcid":"https://orcid.org/0000-0002-3263-4097"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["AU","GB","US"],"is_corresponding":false,"raw_author_name":"Marcus Hutter","raw_affiliation_strings":["Australian National University, Canberra, ACT, Australia","DeepMind, London, UK","Google,,,,,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian National University, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082761102","display_name":"Ramana Kumar","orcid":"https://orcid.org/0000-0002-2319-1933"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Ramana Kumar","raw_affiliation_strings":["DeepMind, London, UK","Google,,,,,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052300917","display_name":"Victoria Krakovna","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Victoria Krakovna","raw_affiliation_strings":["DeepMind, London, UK","Google,,,,,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5020224050"],"corresponding_institution_ids":["https://openalex.org/I118347636","https://openalex.org/I1291425158","https://openalex.org/I4210090411"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":null,"fwci":1.9598,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.8821255,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"198","issue":"S27","first_page":"6435","last_page":"6467"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7984976768493652},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.7091585993766785},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6453805565834045},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5235043168067932},{"id":"https://openalex.org/keywords/bidding","display_name":"Bidding","score":0.5148783922195435},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4992678165435791},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4720645248889923},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4560035467147827},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.4327782392501831},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1714840531349182},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.07052174210548401}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7984976768493652},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.7091585993766785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6453805565834045},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5235043168067932},{"id":"https://openalex.org/C9233905","wikidata":"https://www.wikidata.org/wiki/Q3276328","display_name":"Bidding","level":2,"score":0.5148783922195435},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4992678165435791},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4720645248889923},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4560035467147827},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.4327782392501831},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1714840531349182},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.07052174210548401},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/s11229-021-03141-4","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11229-021-03141-4","pdf_url":null,"source":{"id":"https://openalex.org/S255146","display_name":"Synthese","issn_l":"0039-7857","issn":["0039-7857","1573-0964"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Synthese","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1908.04734","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1908.04734","pdf_url":"https://arxiv.org/pdf/1908.04734","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2968771440","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1908.04734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/305604","is_oa":false,"landing_page_url":"http://hdl.handle.net/1885/305604","pdf_url":null,"source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Synthese","raw_type":"Journal article"},{"id":"doi:10.48550/arxiv.1908.04734","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1908.04734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1908.04734","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1908.04734","pdf_url":"https://arxiv.org/pdf/1908.04734","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":102,"referenced_works":["https://openalex.org/W133789137","https://openalex.org/W145787216","https://openalex.org/W163024403","https://openalex.org/W591538471","https://openalex.org/W648152870","https://openalex.org/W1516659296","https://openalex.org/W1534384411","https://openalex.org/W1567742585","https://openalex.org/W1581742186","https://openalex.org/W1586718744","https://openalex.org/W1624823515","https://openalex.org/W1689629105","https://openalex.org/W1993091619","https://openalex.org/W1998754086","https://openalex.org/W2010936689","https://openalex.org/W2020595559","https://openalex.org/W2025440394","https://openalex.org/W2038908222","https://openalex.org/W2059214603","https://openalex.org/W2061562262","https://openalex.org/W2103561211","https://openalex.org/W2121863487","https://openalex.org/W2133752818","https://openalex.org/W2134067266","https://openalex.org/W2139774323","https://openalex.org/W2143891888","https://openalex.org/W2144240978","https://openalex.org/W2144349330","https://openalex.org/W2144863733","https://openalex.org/W2145339207","https://openalex.org/W2156869222","https://openalex.org/W2168359464","https://openalex.org/W2188233853","https://openalex.org/W2215775476","https://openalex.org/W2224222633","https://openalex.org/W2257979135","https://openalex.org/W2350695713","https://openalex.org/W2383406194","https://openalex.org/W2410842990","https://openalex.org/W2416133397","https://openalex.org/W2574075983","https://openalex.org/W2626804490","https://openalex.org/W2736629007","https://openalex.org/W2738669288","https://openalex.org/W2738675347","https://openalex.org/W2759471388","https://openalex.org/W2761873684","https://openalex.org/W2768908787","https://openalex.org/W2770150859","https://openalex.org/W2772709170","https://openalex.org/W2792012198","https://openalex.org/W2888826999","https://openalex.org/W2896930824","https://openalex.org/W2900559324","https://openalex.org/W2901707424","https://openalex.org/W2902125520","https://openalex.org/W2913758949","https://openalex.org/W2916350282","https://openalex.org/W2917742641","https://openalex.org/W2917770073","https://openalex.org/W2920362155","https://openalex.org/W2948625193","https://openalex.org/W2949800005","https://openalex.org/W2951273977","https://openalex.org/W2955240493","https://openalex.org/W2962799618","https://openalex.org/W2963289505","https://openalex.org/W2963569233","https://openalex.org/W2963646405","https://openalex.org/W2963943581","https://openalex.org/W2963960193","https://openalex.org/W2964043796","https://openalex.org/W2964263543","https://openalex.org/W2964281483","https://openalex.org/W2989847975","https://openalex.org/W2995356893","https://openalex.org/W3002093512","https://openalex.org/W3022566517","https://openalex.org/W3034558260","https://openalex.org/W3035644784","https://openalex.org/W3082042211","https://openalex.org/W3094020431","https://openalex.org/W3101172017","https://openalex.org/W3101852789","https://openalex.org/W3103451896","https://openalex.org/W3104160712","https://openalex.org/W3105871743","https://openalex.org/W3115918552","https://openalex.org/W3118210634","https://openalex.org/W3131546278","https://openalex.org/W3151924600","https://openalex.org/W4256282042","https://openalex.org/W6621199667","https://openalex.org/W6633953550","https://openalex.org/W6634711830","https://openalex.org/W6732559233","https://openalex.org/W6743368274","https://openalex.org/W6758622208","https://openalex.org/W6758864513","https://openalex.org/W6765387057","https://openalex.org/W6791113702","https://openalex.org/W6902784969"],"related_works":["https://openalex.org/W1581742186","https://openalex.org/W2897673281","https://openalex.org/W2166494941","https://openalex.org/W3034558260","https://openalex.org/W2901707424","https://openalex.org/W2902143777","https://openalex.org/W3103306441","https://openalex.org/W3157893055","https://openalex.org/W3034769194","https://openalex.org/W3003129838","https://openalex.org/W2768908787","https://openalex.org/W3115631912","https://openalex.org/W3106341469","https://openalex.org/W1607218107","https://openalex.org/W2064306130","https://openalex.org/W2949969799","https://openalex.org/W2950172727","https://openalex.org/W2998135952","https://openalex.org/W1453801241","https://openalex.org/W1130790960"],"abstract_inverted_index":null,"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
