{"id":"https://openalex.org/W2963842088","doi":"https://doi.org/10.1145/3292500.3330668","title":"Reinforcement Learning to Optimize Long-term User Engagement in Recommender Systems","display_name":"Reinforcement Learning to Optimize Long-term User Engagement in Recommender Systems","publication_year":2019,"publication_date":"2019-07-25","ids":{"openalex":"https://openalex.org/W2963842088","doi":"https://doi.org/10.1145/3292500.3330668","mag":"2963842088"},"language":"en","primary_location":{"id":"doi:10.1145/3292500.3330668","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089307887","display_name":"Lixin Zou","orcid":"https://orcid.org/0000-0001-6755-871X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lixin Zou","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103251082","display_name":"Long Xia","orcid":"https://orcid.org/0000-0003-2580-6206"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Xia","raw_affiliation_strings":["JD.com, Beijing, China"],"affiliations":[{"raw_affiliation_string":"JD.com, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008043408","display_name":"Zhuoye Ding","orcid":"https://orcid.org/0000-0001-7430-5980"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuoye Ding","raw_affiliation_strings":["JD.com, Beijing, China"],"affiliations":[{"raw_affiliation_string":"JD.com, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101656872","display_name":"Jiaxing Song","orcid":"https://orcid.org/0000-0002-7020-8435"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxing Song","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687147","display_name":"Weidong Liu","orcid":"https://orcid.org/0000-0002-2276-2159"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weidong Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054482111","display_name":"Dawei Yin","orcid":"https://orcid.org/0000-0002-8846-2001"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Yin","raw_affiliation_strings":["JD.com, Beijing, China"],"affiliations":[{"raw_affiliation_string":"JD.com, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5089307887"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":35.9402,"has_fulltext":false,"cited_by_count":236,"citation_normalized_percentile":{"value":0.99720758,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2810","last_page":"2818"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8741925954818726},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.8583908677101135},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8379135131835938},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.5938259363174438},{"id":"https://openalex.org/keywords/user-engagement","display_name":"User engagement","score":0.5251799821853638},{"id":"https://openalex.org/keywords/user-modeling","display_name":"User modeling","score":0.5064214468002319},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5014266967773438},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.4902942478656769},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.46075257658958435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4232298731803894},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3848460018634796},{"id":"https://openalex.org/keywords/user-interface","display_name":"User interface","score":0.20214709639549255},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.18580719828605652}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8741925954818726},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.8583908677101135},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8379135131835938},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.5938259363174438},{"id":"https://openalex.org/C2984870255","wikidata":"https://www.wikidata.org/wiki/Q5196451","display_name":"User engagement","level":2,"score":0.5251799821853638},{"id":"https://openalex.org/C67712803","wikidata":"https://www.wikidata.org/wiki/Q7901853","display_name":"User modeling","level":3,"score":0.5064214468002319},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5014266967773438},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.4902942478656769},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.46075257658958435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4232298731803894},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3848460018634796},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.20214709639549255},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.18580719828605652},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C106159729","wikidata":"https://www.wikidata.org/wiki/Q2294553","display_name":"Financial economics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3292500.3330668","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1757796397","https://openalex.org/W2090955639","https://openalex.org/W2112420033","https://openalex.org/W2112533714","https://openalex.org/W2117911558","https://openalex.org/W2137245235","https://openalex.org/W2138108551","https://openalex.org/W2153578526","https://openalex.org/W2166237624","https://openalex.org/W2188353343","https://openalex.org/W2215378786","https://openalex.org/W2225560834","https://openalex.org/W2262817822","https://openalex.org/W2295739661","https://openalex.org/W2312609093","https://openalex.org/W2358698356","https://openalex.org/W2400213106","https://openalex.org/W2409498980","https://openalex.org/W2494566063","https://openalex.org/W2604639157","https://openalex.org/W2604822632","https://openalex.org/W2605350416","https://openalex.org/W2739805805","https://openalex.org/W2739916191","https://openalex.org/W2767807341","https://openalex.org/W2782696945","https://openalex.org/W2783118243","https://openalex.org/W2784068709","https://openalex.org/W2788295351","https://openalex.org/W2799544270","https://openalex.org/W2941385591","https://openalex.org/W2950577311","https://openalex.org/W2952613481","https://openalex.org/W2953337808","https://openalex.org/W2962785510","https://openalex.org/W2964044287","https://openalex.org/W3099420497","https://openalex.org/W3102778384","https://openalex.org/W3102899483","https://openalex.org/W4211072278"],"related_works":["https://openalex.org/W1534274833","https://openalex.org/W3117246195","https://openalex.org/W156620619","https://openalex.org/W2098233217","https://openalex.org/W2537376277","https://openalex.org/W3172701938","https://openalex.org/W4297577197","https://openalex.org/W3128744564","https://openalex.org/W4396233422","https://openalex.org/W2766485692"],"abstract_inverted_index":{"Recommender":[0],"systems":[1],"play":[2],"a":[3,42,44,75,162,216],"crucial":[4],"role":[5],"in":[6,17,37,137,157,181,207],"our":[7],"daily":[8],"lives.":[9],"Feed":[10],"streaming":[11,28],"mechanism":[12],"has":[13],"been":[14],"widely":[15],"used":[16],"the":[18,23,32,79,96,100,169,196,199,203,226],"recommender":[19,46],"system,":[20],"especially":[21,146],"on":[22,212],"mobile":[24],"Apps.":[25],"The":[26],"feed":[27],"setting":[29],"provides":[30],"users":[31],"interactive":[33],"manner":[34],"of":[35,98,124,186,205],"recommendation":[36],"never-ending":[38],"feeds.":[39],"In":[40],"such":[41],"manner,":[43],"good":[45],"system":[47],"should":[48],"pay":[49],"more":[50],"attention":[51],"to":[52,106,119,167],"user":[53,67,72,109,115,171,189,228],"stickiness,":[54],"which":[55,121,179,194],"is":[56,74,82,111,143],"far":[57],"beyond":[58],"classical":[59],"instant":[60,126],"metrics":[61],"and":[62,130,150,191,201,215,230],"typically":[63,122],"measured":[64],"by":[65],"long-term":[66,71,108,170,227],"engagement.":[68,172],"Directly":[69],"optimizing":[70],"engagement":[73,110,229],"non-trivial":[76],"problem,":[77],"as":[78],"learning":[80,89,142],"target":[81],"usually":[83],"not":[84],"available":[85],"for":[86],"conventional":[87],"supervised":[88],"methods.":[90],"Though":[91],"reinforcement":[92],"learning~(RL)":[93],"naturally":[94],"fits":[95],"problem":[97],"maximizing":[99],"long":[101],"term":[102],"rewards,":[103],"applying":[104],"RL":[105,163],"optimize":[107,168],"still":[112,144],"facing":[113],"challenges:":[114],"behaviors":[116],"are":[117],"versatile":[118],"model,":[120],"consists":[123],"both":[125],"feedback":[127,132],"(eg.":[128,133],"clicks)":[129],"delayed":[131],"dwell":[134],"time,":[135],"revisit);":[136],"addition,":[138],"performing":[139],"effective":[140],"off-policy":[141],"immature,":[145],"when":[147],"combining":[148],"bootstrapping":[149],"function":[151],"approximation.":[152],"To":[153],"address":[154],"these":[155],"issues,":[156],"this":[158],"work,":[159],"we":[160],"introduce":[161],"framework":[164],"---":[165],"FeedRec":[166,173,223],"includes":[174],"two":[175],"components:":[176],"1)~a":[177],"Q-Network":[178,200],"designed":[180],"hierarchical":[182],"LSTM":[183],"takes":[184],"charge":[185],"modeling":[187],"complex":[188],"behaviors,":[190],"2)~a":[192],"S-Network,":[193],"simulates":[195],"environment,":[197],"assists":[198],"voids":[202],"instability":[204],"convergence":[206],"policy":[208],"learning.":[209],"Extensive":[210],"experiments":[211],"synthetic":[213],"data":[214,220],"real-world":[217],"large":[218],"scale":[219],"show":[221],"that":[222],"effectively":[224],"optimizes":[225],"outperforms":[231],"state-of-the-arts.":[232]},"counts_by_year":[{"year":2026,"cited_by_count":10},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":46},{"year":2023,"cited_by_count":55},{"year":2022,"cited_by_count":39},{"year":2021,"cited_by_count":43},{"year":2020,"cited_by_count":17},{"year":2019,"cited_by_count":3}],"updated_date":"2026-04-24T08:23:43.765630","created_date":"2025-10-10T00:00:00"}
