{"id":"https://openalex.org/W4385245587","doi":"https://doi.org/10.1145/3600211.3604669","title":"User Tampering in Reinforcement Learning Recommender Systems","display_name":"User Tampering in Reinforcement Learning Recommender Systems","publication_year":2023,"publication_date":"2023-08-08","ids":{"openalex":"https://openalex.org/W4385245587","doi":"https://doi.org/10.1145/3600211.3604669"},"language":"en","primary_location":{"id":"doi:10.1145/3600211.3604669","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3600211.3604669","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 AAAI/ACM Conference on AI, Ethics, and Society","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3600211.3604669","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047979305","display_name":"Atoosa Kasirzadeh","orcid":"https://orcid.org/0000-0002-5967-3782"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["AU","GB"],"is_corresponding":false,"raw_author_name":"Atoosa Kasirzadeh","raw_affiliation_strings":["University of Edinburgh, United Kingdom and Australian National University, Australia"],"raw_orcid":"https://orcid.org/0000-0002-5967-3782","affiliations":[{"raw_affiliation_string":"University of Edinburgh, United Kingdom and Australian National University, Australia","institution_ids":["https://openalex.org/I118347636","https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006964403","display_name":"Charles L. Evans","orcid":"https://orcid.org/0000-0001-5588-9052"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Charles Evans","raw_affiliation_strings":["Australian National University, Australia"],"raw_orcid":"https://orcid.org/0000-0001-5588-9052","affiliations":[{"raw_affiliation_string":"Australian National University, Australia","institution_ids":["https://openalex.org/I118347636"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.6729,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.95056338,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"58","last_page":"69"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.8138286471366882},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8078807592391968},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7964233160018921},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.45645850896835327},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33289340138435364},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3225947618484497},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.32228100299835205}],"concepts":[{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.8138286471366882},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8078807592391968},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7964233160018921},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.45645850896835327},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33289340138435364},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3225947618484497},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.32228100299835205}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3600211.3604669","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3600211.3604669","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 AAAI/ACM Conference on AI, Ethics, and Society","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:publications/b9e86d1e-d934-4662-a75f-ac798d26cd60","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2109.04083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"doi:10.1145/3600211.3604669","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3600211.3604669","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 AAAI/ACM Conference on AI, Ethics, and Society","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320317153","display_name":"DeepMind","ror":"https://ror.org/00971b260"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2024320089","https://openalex.org/W2025605741","https://openalex.org/W2029604041","https://openalex.org/W2030481925","https://openalex.org/W2112420033","https://openalex.org/W2135263912","https://openalex.org/W2138108551","https://openalex.org/W2153111836","https://openalex.org/W2358698356","https://openalex.org/W2787933113","https://openalex.org/W2788295351","https://openalex.org/W2791382412","https://openalex.org/W2799544270","https://openalex.org/W2889127731","https://openalex.org/W2917760808","https://openalex.org/W2973004458","https://openalex.org/W2980951591","https://openalex.org/W3008301325","https://openalex.org/W3034558260","https://openalex.org/W3099814932","https://openalex.org/W3102778384","https://openalex.org/W3102899483","https://openalex.org/W3103006639","https://openalex.org/W3106000504","https://openalex.org/W3123348991","https://openalex.org/W3175333484","https://openalex.org/W3199782056","https://openalex.org/W4283790836","https://openalex.org/W4386713598"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4390273403","https://openalex.org/W4386781444","https://openalex.org/W2150182025","https://openalex.org/W3092950680","https://openalex.org/W3197542405","https://openalex.org/W2056712470","https://openalex.org/W3125580266","https://openalex.org/W4288390103","https://openalex.org/W4317039510"],"abstract_inverted_index":{"In":[0],"this":[1,176],"paper,":[2],"we":[3,80,92,208],"introduce":[4],"new":[5],"formal":[6,59],"methods":[7,84,105],"and":[8,79,101,190],"provide":[9],"empirical":[10],"evidence":[11],"to":[12,52,64,151,155,165],"highlight":[13],"a":[14,31,40,50,127,146,198],"unique":[15],"safety":[16,195],"concern":[17],"prevalent":[18],"in":[19,70,108,163,201,211],"reinforcement":[20],"learning":[21],"(RL)-based":[22],"recommendation":[23,77,133,188],"algorithms":[24],"\u2013":[25],"\u2019user":[26],"tampering.\u2019":[27],"User":[28],"tampering":[29,99,115],"is":[30],"situation":[32],"where":[33],"an":[34,131],"RL-based":[35,76,132,187],"recommender":[36],"system":[37,134],"may":[38],"manipulate":[39],"media":[41],"user\u2019s":[42],"opinions":[43],"through":[44],"its":[45,153,160],"suggestions":[46],"as":[47],"part":[48],"of":[49,113,119,130,139],"policy":[51],"maximize":[53],"long-term":[54],"user":[55,89,114],"engagement.":[56],"We":[57,121],"use":[58],"techniques":[60],"from":[61,205],"causal":[62],"modeling":[63],"critically":[65],"analyze":[66],"prevailing":[67],"solutions":[68],"proposed":[69],"the":[71,110,117,137,182,202,206,212],"literature":[72],"for":[73,97,184],"implementing":[74],"scalable":[75],"systems,":[78],"observe":[81],"that":[82,103,145,173,192],"these":[83,104],"do":[85],"not":[86],"adequately":[87],"prevent":[88],"tampering.":[90],"Moreover,":[91],"evaluate":[93],"existing":[94],"mitigation":[95],"strategies":[96],"reward":[98],"issues,":[100],"show":[102],"are":[106],"insufficient":[107],"addressing":[109],"distinct":[111],"phenomenon":[112],"within":[116],"context":[118],"recommendations.":[120],"further":[122],"reinforce":[123],"our":[124],"findings":[125,180],"with":[126,159,170,175],"simulation":[128],"study":[129,143],"focused":[135],"on":[136],"dissemination":[138],"political":[140],"content.":[141],"Our":[142,179],"shows":[144],"Q-learning":[147],"algorithm":[148],"consistently":[149],"learns":[150],"exploit":[152],"opportunities":[154],"polarize":[156],"simulated":[157],"users":[158],"early":[161],"recommendations":[162,172],"order":[164],"have":[166,209],"more":[167],"consistent":[168],"success":[169],"subsequent":[171],"align":[174],"induced":[177],"polarization.":[178],"emphasize":[181],"necessity":[183],"developing":[185],"safer":[186],"systems":[189],"suggest":[191],"achieving":[193],"such":[194],"would":[196],"require":[197],"fundamental":[199],"shift":[200],"design":[203],"away":[204],"approaches":[207],"seen":[210],"recent":[213],"literature.":[214]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-24T13:16:06.693445","created_date":"2023-07-26T00:00:00"}