{"id":"https://openalex.org/W3123348991","doi":"https://doi.org/10.1145/3543846","title":"Reinforcement Learning based Recommender Systems: A Survey","display_name":"Reinforcement Learning based Recommender Systems: A Survey","publication_year":2022,"publication_date":"2022-06-15","ids":{"openalex":"https://openalex.org/W3123348991","doi":"https://doi.org/10.1145/3543846","mag":"3123348991"},"language":"en","primary_location":{"id":"doi:10.1145/3543846","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3543846","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076309286","display_name":"M. Mehdi Afsar","orcid":"https://orcid.org/0000-0002-6108-5250"},"institutions":[{"id":"https://openalex.org/I168635309","display_name":"University of Calgary","ror":"https://ror.org/03yjb2x39","country_code":"CA","type":"education","lineage":["https://openalex.org/I168635309"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"M. Mehdi Afsar","raw_affiliation_strings":["University of Calgary, Calgary, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Calgary, Calgary, AB, Canada","institution_ids":["https://openalex.org/I168635309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042659974","display_name":"Trafford Crump","orcid":"https://orcid.org/0000-0001-6690-1926"},"institutions":[{"id":"https://openalex.org/I168635309","display_name":"University of Calgary","ror":"https://ror.org/03yjb2x39","country_code":"CA","type":"education","lineage":["https://openalex.org/I168635309"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Trafford Crump","raw_affiliation_strings":["University of Calgary, Calgary, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Calgary, Calgary, AB, Canada","institution_ids":["https://openalex.org/I168635309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008779348","display_name":"Behrouz H. Far","orcid":"https://orcid.org/0000-0003-1589-8039"},"institutions":[{"id":"https://openalex.org/I168635309","display_name":"University of Calgary","ror":"https://ror.org/03yjb2x39","country_code":"CA","type":"education","lineage":["https://openalex.org/I168635309"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Behrouz Far","raw_affiliation_strings":["University of Calgary, Calgary, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Calgary, Calgary, AB, Canada","institution_ids":["https://openalex.org/I168635309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5076309286"],"corresponding_institution_ids":["https://openalex.org/I168635309"],"apc_list":null,"apc_paid":null,"fwci":130.3992,"has_fulltext":false,"cited_by_count":449,"citation_normalized_percentile":{"value":0.99989617,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"55","issue":"7","first_page":"1","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.8876405358314514},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8838287591934204},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8801918625831604},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6983479261398315},{"id":"https://openalex.org/keywords/rss","display_name":"RSS","score":0.5953751802444458},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5677486658096313},{"id":"https://openalex.org/keywords/collaborative-filtering","display_name":"Collaborative filtering","score":0.5586691498756409},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5492451786994934},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5437155961990356},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4674438238143921},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.31889185309410095},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1871107518672943},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.08494022488594055}],"concepts":[{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.8876405358314514},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8838287591934204},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8801918625831604},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6983479261398315},{"id":"https://openalex.org/C2385561","wikidata":"https://www.wikidata.org/wiki/Q45432","display_name":"RSS","level":2,"score":0.5953751802444458},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5677486658096313},{"id":"https://openalex.org/C21569690","wikidata":"https://www.wikidata.org/wiki/Q94702","display_name":"Collaborative filtering","level":3,"score":0.5586691498756409},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5492451786994934},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5437155961990356},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4674438238143921},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.31889185309410095},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1871107518672943},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.08494022488594055},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3543846","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3543846","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":271,"referenced_works":["https://openalex.org/W12291563","https://openalex.org/W21938781","https://openalex.org/W51508254","https://openalex.org/W91593682","https://openalex.org/W124618424","https://openalex.org/W281665770","https://openalex.org/W1015675232","https://openalex.org/W1412447802","https://openalex.org/W1486317198","https://openalex.org/W1493395914","https://openalex.org/W1515851193","https://openalex.org/W1526077108","https://openalex.org/W1530276735","https://openalex.org/W1556219185","https://openalex.org/W1572047901","https://openalex.org/W1592847719","https://openalex.org/W1622701847","https://openalex.org/W1757796397","https://openalex.org/W1924770834","https://openalex.org/W1966553486","https://openalex.org/W1968563228","https://openalex.org/W1975767384","https://openalex.org/W1977655452","https://openalex.org/W1995835534","https://openalex.org/W1997136459","https://openalex.org/W2008886893","https://openalex.org/W2009979684","https://openalex.org/W2012516036","https://openalex.org/W2017957151","https://openalex.org/W2020008338","https://openalex.org/W2024320089","https://openalex.org/W2025605741","https://openalex.org/W2037913744","https://openalex.org/W2042602549","https://openalex.org/W2044758663","https://openalex.org/W2046788142","https://openalex.org/W2049655669","https://openalex.org/W2056089161","https://openalex.org/W2068441181","https://openalex.org/W2078021145","https://openalex.org/W2080950999","https://openalex.org/W2086206379","https://openalex.org/W2091565802","https://openalex.org/W2093848332","https://openalex.org/W2097082857","https://openalex.org/W2099471712","https://openalex.org/W2099780436","https://openalex.org/W2100235918","https://openalex.org/W2101409192","https://openalex.org/W2102166862","https://openalex.org/W2105883975","https://openalex.org/W2107478203","https://openalex.org/W2109910161","https://openalex.org/W2112420033","https://openalex.org/W2112794046","https://openalex.org/W2113510994","https://openalex.org/W2116206254","https://openalex.org/W2116354394","https://openalex.org/W2117012949","https://openalex.org/W2117911558","https://openalex.org/W2119717200","https://openalex.org/W2120346334","https://openalex.org/W2126026531","https://openalex.org/W2126316555","https://openalex.org/W2135263912","https://openalex.org/W2137063737","https://openalex.org/W2138108551","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2146364776","https://openalex.org/W2146682077","https://openalex.org/W2149008295","https://openalex.org/W2151161180","https://openalex.org/W2153579005","https://openalex.org/W2155968351","https://openalex.org/W2156759379","https://openalex.org/W2156909104","https://openalex.org/W2157973827","https://openalex.org/W2159334452","https://openalex.org/W2163068732","https://openalex.org/W2165150801","https://openalex.org/W2168405694","https://openalex.org/W2171446571","https://openalex.org/W2173248099","https://openalex.org/W2173564293","https://openalex.org/W2189395077","https://openalex.org/W2194775991","https://openalex.org/W2201581102","https://openalex.org/W2215378786","https://openalex.org/W2219888463","https://openalex.org/W2257979135","https://openalex.org/W2273088453","https://openalex.org/W2273621210","https://openalex.org/W2275802500","https://openalex.org/W2287880243","https://openalex.org/W2335959470","https://openalex.org/W2341865734","https://openalex.org/W2395575420","https://openalex.org/W2396881363","https://openalex.org/W2396961959","https://openalex.org/W2512370135","https://openalex.org/W2519091744","https://openalex.org/W2535327675","https://openalex.org/W2544422362","https://openalex.org/W2557201733","https://openalex.org/W2583993537","https://openalex.org/W2584643785","https://openalex.org/W2596367596","https://openalex.org/W2604382266","https://openalex.org/W2608990489","https://openalex.org/W2612493630","https://openalex.org/W2623431351","https://openalex.org/W2731083990","https://openalex.org/W2736057540","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2747956544","https://openalex.org/W2759912774","https://openalex.org/W2766527293","https://openalex.org/W2768978543","https://openalex.org/W2772217324","https://openalex.org/W2781763969","https://openalex.org/W2783573456","https://openalex.org/W2785994986","https://openalex.org/W2787933113","https://openalex.org/W2788295351","https://openalex.org/W2788728386","https://openalex.org/W2799117832","https://openalex.org/W2799544270","https://openalex.org/W2803281228","https://openalex.org/W2805720864","https://openalex.org/W2809162153","https://openalex.org/W2886209086","https://openalex.org/W2886601525","https://openalex.org/W2897405591","https://openalex.org/W2898273872","https://openalex.org/W2898621204","https://openalex.org/W2902572901","https://openalex.org/W2904730732","https://openalex.org/W2909906617","https://openalex.org/W2911104099","https://openalex.org/W2925845208","https://openalex.org/W2936427778","https://openalex.org/W2941385591","https://openalex.org/W2945488061","https://openalex.org/W2946996695","https://openalex.org/W2949382160","https://openalex.org/W2952152174","https://openalex.org/W2952523895","https://openalex.org/W2952955766","https://openalex.org/W2955026632","https://openalex.org/W2955774983","https://openalex.org/W2962790223","https://openalex.org/W2962818688","https://openalex.org/W2963068985","https://openalex.org/W2963262099","https://openalex.org/W2963277051","https://openalex.org/W2963561234","https://openalex.org/W2963619374","https://openalex.org/W2963654596","https://openalex.org/W2963842088","https://openalex.org/W2963864421","https://openalex.org/W2964112275","https://openalex.org/W2964157711","https://openalex.org/W2965185552","https://openalex.org/W2965512832","https://openalex.org/W2969920248","https://openalex.org/W2972561734","https://openalex.org/W2972905192","https://openalex.org/W2980520305","https://openalex.org/W2981211936","https://openalex.org/W2982120135","https://openalex.org/W2984869362","https://openalex.org/W2991046523","https://openalex.org/W2996569333","https://openalex.org/W2996736458","https://openalex.org/W2996959725","https://openalex.org/W2997556638","https://openalex.org/W2997561372","https://openalex.org/W2997662139","https://openalex.org/W2997914278","https://openalex.org/W2999560001","https://openalex.org/W3000289295","https://openalex.org/W3003931103","https://openalex.org/W3006656721","https://openalex.org/W3007094061","https://openalex.org/W3007830800","https://openalex.org/W3010690134","https://openalex.org/W3011120880","https://openalex.org/W3011809564","https://openalex.org/W3012881846","https://openalex.org/W3020831971","https://openalex.org/W3021608756","https://openalex.org/W3029587262","https://openalex.org/W3032675404","https://openalex.org/W3034099687","https://openalex.org/W3034833075","https://openalex.org/W3034853385","https://openalex.org/W3035084859","https://openalex.org/W3035098003","https://openalex.org/W3035166059","https://openalex.org/W3035170973","https://openalex.org/W3040094401","https://openalex.org/W3043995487","https://openalex.org/W3045525389","https://openalex.org/W3047443212","https://openalex.org/W3049342604","https://openalex.org/W3087898974","https://openalex.org/W3088936686","https://openalex.org/W3091129453","https://openalex.org/W3099726771","https://openalex.org/W3099865390","https://openalex.org/W3100944043","https://openalex.org/W3101366597","https://openalex.org/W3102899483","https://openalex.org/W3103141630","https://openalex.org/W3103559770","https://openalex.org/W3104966867","https://openalex.org/W3106445281","https://openalex.org/W3116249021","https://openalex.org/W3116873649","https://openalex.org/W3121933628","https://openalex.org/W3122507327","https://openalex.org/W3123956618","https://openalex.org/W3124675547","https://openalex.org/W3132328326","https://openalex.org/W3138819813","https://openalex.org/W3139377883","https://openalex.org/W3146823280","https://openalex.org/W3152509363","https://openalex.org/W3159868125","https://openalex.org/W3164213197","https://openalex.org/W3165844356","https://openalex.org/W3169408585","https://openalex.org/W3169528684","https://openalex.org/W3185693672","https://openalex.org/W3194194466","https://openalex.org/W3206324923","https://openalex.org/W4214717370","https://openalex.org/W4230030715","https://openalex.org/W4230472272","https://openalex.org/W4248672808","https://openalex.org/W4254547512","https://openalex.org/W4288110508","https://openalex.org/W4288302490","https://openalex.org/W4288346394","https://openalex.org/W4288372913","https://openalex.org/W4289148089","https://openalex.org/W4289709987","https://openalex.org/W4294567340","https://openalex.org/W4294643831","https://openalex.org/W4294990489","https://openalex.org/W4298857966","https://openalex.org/W4299802797","https://openalex.org/W4300006470","https://openalex.org/W4300799055","https://openalex.org/W4301178047","https://openalex.org/W4394672593","https://openalex.org/W6637967152","https://openalex.org/W6687681856","https://openalex.org/W6712451147","https://openalex.org/W6718092244","https://openalex.org/W6732665253","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6746030470","https://openalex.org/W6761637595","https://openalex.org/W6780559895","https://openalex.org/W6795843004","https://openalex.org/W6802356573","https://openalex.org/W7034144617"],"related_works":["https://openalex.org/W1484355083","https://openalex.org/W4220714703","https://openalex.org/W2170391450","https://openalex.org/W2735929803","https://openalex.org/W2098758514","https://openalex.org/W3008845055","https://openalex.org/W2041004656","https://openalex.org/W2202724490","https://openalex.org/W2119611366","https://openalex.org/W2103058005"],"abstract_inverted_index":{"Recommender":[0],"systems":[1,191],"(RSs)":[2],"have":[3],"become":[4],"an":[5,200,240],"inseparable":[6],"part":[7],"of":[8,116,139,142,158,215,218],"our":[9,16,21,27],"everyday":[10],"lives.":[11],"They":[12],"help":[13],"us":[14],"find":[15],"favorite":[17,28],"items":[18],"to":[19,30,38,97,105,168,171,198,206],"purchase,":[20],"friends":[22],"on":[23,186,202],"social":[24],"networks,":[25],"and":[26,76,91,104,124,178,205,224,234,253,256,265,272,279],"movies":[29],"watch.":[31],"Traditionally,":[32],"the":[33,62,99,109,114,153,156,172,203,208,219,286],"recommendation":[34,86,120,173],"problem":[35,58,174],"was":[36,133],"considered":[37],"be":[39,68,77,229,283],"a":[40,55,71,147,184,211],"classification":[41],"or":[42],"prediction":[43],"problem,":[44],"but":[45],"it":[46,53,66,132,166],"is":[47,95,121,193,197],"now":[48],"widely":[49],"agreed":[50],"that":[51,226,281],"formulating":[52],"as":[54,70],"sequential":[56],"decision":[57,73],"can":[59,67,228,282],"better":[60],"reflect":[61],"user-system":[63,102],"interaction.":[64],"Therefore,":[65],"formulated":[69],"Markov":[72],"process":[74],"(MDP)":[75],"solved":[78],"by":[79],"reinforcement":[80,160,187],"learning":[81,161,188],"(RL)":[82],"algorithms.":[83,145],"Unlike":[84],"traditional":[85,143],"methods,":[87],"including":[88],"collaborative":[89],"filtering":[90],"content-based":[92],"filtering,":[93],"RL":[94,118,144,170],"able":[96],"handle":[98],"sequential,":[100],"dynamic":[101],"interaction":[103],"take":[106],"into":[107,232],"account":[108],"long-term":[110],"user":[111],"engagement.":[112],"Although":[113],"idea":[115],"using":[117,269],"for":[119,128],"not":[122,134],"new":[123,148],"has":[125,150],"been":[126],"around":[127],"about":[129],"two":[130],"decades,":[131],"very":[135],"practical,":[136],"mainly":[137],"because":[138],"scalability":[140],"problems":[141],"However,":[146],"trend":[149],"emerged":[151],"in":[152,285],"field":[154,204],"since":[155],"introduction":[157],"deep":[159],"(DRL)":[162],",":[163],"which":[164],"made":[165],"possible":[167],"apply":[169],"with":[175,210,243],"large":[176],"state":[177,247],"action":[179],"spaces.":[180],"In":[181],"this":[182],"paper,":[183],"survey":[185,257],"based":[189],"recommender":[190],"(RLRSs)":[192],"presented.":[194],"Our":[195],"aim":[196],"present":[199],"outlook":[201],"provide":[207],"reader":[209],"fairly":[212],"complete":[213],"knowledge":[214],"key":[216],"concepts":[217],"field.":[220],"We":[221,261],"first":[222],"recognize":[223],"illustrate":[225],"RLRSs":[227],"generally":[230],"classified":[231],"RL-":[233],"DRL-based":[235],"methods.":[236],"Then,":[237],"we":[238,275],"propose":[239],"RLRS":[241,258],"framework":[242],"four":[244],"components,":[245],"i.e.,":[246],"representation,":[248],"policy":[249],"optimization,":[250],"reward":[251],"formulation,":[252],"environment":[254],"building,":[255],"algorithms":[259],"accordingly.":[260],"highlight":[262],"emerging":[263],"topics":[264],"depict":[266],"important":[267,277],"trends":[268],"various":[270],"graphs":[271],"tables.":[273],"Finally,":[274],"discuss":[276],"aspects":[278],"challenges":[280],"addressed":[284],"future.":[287]},"counts_by_year":[{"year":2026,"cited_by_count":22},{"year":2025,"cited_by_count":141},{"year":2024,"cited_by_count":149},{"year":2023,"cited_by_count":92},{"year":2022,"cited_by_count":27},{"year":2021,"cited_by_count":18}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
