{"id":"https://openalex.org/W4396216288","doi":"https://doi.org/10.1145/3661996","title":"On the Opportunities and Challenges of Offline Reinforcement Learning for Recommender Systems","display_name":"On the Opportunities and Challenges of Offline Reinforcement Learning for Recommender Systems","publication_year":2024,"publication_date":"2024-04-29","ids":{"openalex":"https://openalex.org/W4396216288","doi":"https://doi.org/10.1145/3661996"},"language":"en","primary_location":{"id":"doi:10.1145/3661996","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3661996","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3661996","source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3661996","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057487706","display_name":"Xiaocong Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Xiaocong Chen","raw_affiliation_strings":["Data61, CSIRO, Eveleigh, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Eveleigh, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358930","display_name":"Siyu Wang","orcid":"https://orcid.org/0009-0008-8726-5277"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Siyu Wang","raw_affiliation_strings":["UNSW Sydney, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"UNSW Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021827617","display_name":"Julian McAuley","orcid":"https://orcid.org/0000-0003-0955-7588"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julian McAuley","raw_affiliation_strings":["UCSD, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"UCSD, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005545405","display_name":"Dietmar Jannach","orcid":"https://orcid.org/0000-0002-4698-8507"},"institutions":[{"id":"https://openalex.org/I4210166741","display_name":"University of Klagenfurt","ror":"https://ror.org/05q9m0937","country_code":"AT","type":"education","lineage":["https://openalex.org/I4210166741"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Dietmar Jannach","raw_affiliation_strings":["University of Klagenfurt, Klagenfurt, Austria"],"affiliations":[{"raw_affiliation_string":"University of Klagenfurt, Klagenfurt, Austria","institution_ids":["https://openalex.org/I4210166741"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052731721","display_name":"Lina Yao","orcid":"https://orcid.org/0000-0002-4149-839X"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Lina Yao","raw_affiliation_strings":["Data61, CSIRO, Eveleigh, Australia and UNSW Sydney, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Eveleigh, Australia and UNSW Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I31746571","https://openalex.org/I1292875679"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5057487706"],"corresponding_institution_ids":["https://openalex.org/I1292875679","https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":5.3982,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.95746858,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"42","issue":"6","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8996798396110535},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.8289909362792969},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7601394653320312},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.6027904748916626},{"id":"https://openalex.org/keywords/online-and-offline","display_name":"Online and offline","score":0.5671426653862},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5440941452980042},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4699069857597351},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40195325016975403},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3580101728439331},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.30333203077316284},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11489495635032654}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8996798396110535},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.8289909362792969},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7601394653320312},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.6027904748916626},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.5671426653862},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5440941452980042},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4699069857597351},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40195325016975403},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3580101728439331},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30333203077316284},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11489495635032654},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3661996","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3661996","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3661996","source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3661996","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3661996","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3661996","source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396216288.pdf"},"referenced_works_count":63,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W2030808931","https://openalex.org/W2041710630","https://openalex.org/W2119717200","https://openalex.org/W2145339207","https://openalex.org/W2406454855","https://openalex.org/W2560674852","https://openalex.org/W2787933113","https://openalex.org/W2788728386","https://openalex.org/W2798435682","https://openalex.org/W2799544270","https://openalex.org/W2897405591","https://openalex.org/W2902572901","https://openalex.org/W2906762886","https://openalex.org/W2914272066","https://openalex.org/W2914584698","https://openalex.org/W2962886429","https://openalex.org/W2963654596","https://openalex.org/W2984100107","https://openalex.org/W2990130970","https://openalex.org/W3012881846","https://openalex.org/W3017289192","https://openalex.org/W3022972087","https://openalex.org/W3029587262","https://openalex.org/W3035051411","https://openalex.org/W3080116024","https://openalex.org/W3081226161","https://openalex.org/W3087898974","https://openalex.org/W3092103025","https://openalex.org/W3097300053","https://openalex.org/W3098551837","https://openalex.org/W3099464630","https://openalex.org/W3101366597","https://openalex.org/W3102631191","https://openalex.org/W3102899483","https://openalex.org/W3116073702","https://openalex.org/W3123348991","https://openalex.org/W3124675547","https://openalex.org/W3133706083","https://openalex.org/W3173984942","https://openalex.org/W3190132430","https://openalex.org/W3200267675","https://openalex.org/W3201286590","https://openalex.org/W4220806694","https://openalex.org/W4224308683","https://openalex.org/W4283789472","https://openalex.org/W4283796596","https://openalex.org/W4284712454","https://openalex.org/W4290858323","https://openalex.org/W4293653493","https://openalex.org/W4296604485","https://openalex.org/W4299828299","https://openalex.org/W4306317772","https://openalex.org/W4307302081","https://openalex.org/W4318159260","https://openalex.org/W4318823254","https://openalex.org/W4367319708","https://openalex.org/W4383993482","https://openalex.org/W4384408011","https://openalex.org/W4384891727","https://openalex.org/W4386729683","https://openalex.org/W4399280865"],"related_works":["https://openalex.org/W2264067234","https://openalex.org/W3124243301","https://openalex.org/W1571502335","https://openalex.org/W1589409554","https://openalex.org/W2759038785","https://openalex.org/W2172232600","https://openalex.org/W3123876860","https://openalex.org/W3124172198","https://openalex.org/W2046181650","https://openalex.org/W4225619808"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,78,85,114,130,143],"serves":[2],"as":[3],"a":[4,22,64,80,119],"potent":[5],"tool":[6],"for":[7,51],"modeling":[8],"dynamic":[9],"user":[10,55],"interests":[11],"within":[12,144],"recommender":[13,40,62,103,125,145],"systems,":[14,146],"garnering":[15],"increasing":[16],"research":[17,171],"attention":[18],"of":[19,37,70,111,151],"late.":[20],"However,":[21],"significant":[23],"drawback":[24],"persists:":[25],"its":[26,32],"poor":[27],"data":[28],"efficiency,":[29],"stemming":[30],"from":[31,91],"interactive":[33],"nature.":[34],"The":[35],"training":[36],"reinforcement":[38,60,77,84,113,129,142],"learning-based":[39,61],"systems":[41,63,104,126],"demands":[42],"expensive":[43],"online":[44,99],"interactions":[45],"to":[46,53,88,136,160,169],"amass":[47],"adequate":[48],"trajectories,":[49],"essential":[50],"agents":[52,87],"learn":[54],"preferences.":[56],"This":[57,133],"inefficiency":[58],"renders":[59],"formidable":[65],"undertaking,":[66],"necessitating":[67],"the":[68,109],"exploration":[69],"potential":[71],"solutions.":[72],"Recent":[73],"strides":[74],"in":[75,98,154,172],"offline":[76,92,107,112,128,141],"present":[79],"new":[81],"perspective.":[82],"Offline":[83],"empowers":[86],"glean":[89],"insights":[90],"datasets":[93],"and":[94,138,165],"deploy":[95],"learned":[96],"policies":[97],"settings.":[100],"Given":[101],"that":[102],"possess":[105],"extensive":[106],"datasets,":[108],"framework":[110],"aligns":[115],"seamlessly.":[116],"Despite":[117],"being":[118],"burgeoning":[120],"field,":[121],"works":[122],"centered":[123],"on":[124],"utilizing":[127],"remain":[131],"limited.":[132],"survey":[134],"aims":[135],"introduce":[137],"delve":[139],"into":[140],"offering":[147],"an":[148],"inclusive":[149],"review":[150],"existing":[152],"literature":[153],"this":[155,173],"domain.":[156],"Furthermore,":[157],"we":[158],"strive":[159],"underscore":[161],"prevalent":[162],"challenges,":[163],"opportunities,":[164],"future":[166],"pathways,":[167],"poised":[168],"propel":[170],"evolving":[174],"field.":[175]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
