{"id":"https://openalex.org/W4384659454","doi":"https://doi.org/10.1145/3539618.3592022","title":"Model-free Reinforcement Learning with Stochastic Reward Stabilization for Recommender Systems","display_name":"Model-free Reinforcement Learning with Stochastic Reward Stabilization for Recommender Systems","publication_year":2023,"publication_date":"2023-07-18","ids":{"openalex":"https://openalex.org/W4384659454","doi":"https://doi.org/10.1145/3539618.3592022"},"language":"en","primary_location":{"id":"doi:10.1145/3539618.3592022","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3592022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2308.13246","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029097679","display_name":"Tianchi Cai","orcid":"https://orcid.org/0000-0003-1503-6519"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tianchi Cai","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052770881","display_name":"Shenliao Bao","orcid":"https://orcid.org/0009-0001-1439-3170"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shenliao Bao","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072368959","display_name":"Jiyan Jiang","orcid":"https://orcid.org/0000-0002-1083-2834"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiyan Jiang","raw_affiliation_strings":["Ant Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086273618","display_name":"Shiji Zhou","orcid":"https://orcid.org/0000-0002-0666-0769"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiji Zhou","raw_affiliation_strings":["Ant Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078799714","display_name":"Wenpeng Zhang","orcid":"https://orcid.org/0000-0002-3796-161X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenpeng Zhang","raw_affiliation_strings":["Ant Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058735109","display_name":"Lihong Gu","orcid":"https://orcid.org/0000-0002-0706-3448"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lihong Gu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053242349","display_name":"Jinjie Gu","orcid":"https://orcid.org/0000-0001-7596-4945"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinjie Gu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031338085","display_name":"Guannan Zhang","orcid":"https://orcid.org/0000-0002-7091-2318"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guannan Zhang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5029097679"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10783533,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2179","last_page":"2183"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.9043356776237488},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8320668935775757},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7013537883758545},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48081719875335693},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4402593970298767},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4389982223510742},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41860756278038025}],"concepts":[{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.9043356776237488},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8320668935775757},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7013537883758545},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48081719875335693},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4402593970298767},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4389982223510742},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41860756278038025},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3539618.3592022","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3592022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2308.13246","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.13246","pdf_url":"https://arxiv.org/pdf/2308.13246","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2308.13246","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.13246","pdf_url":"https://arxiv.org/pdf/2308.13246","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4384659454.pdf","grobid_xml":"https://content.openalex.org/works/W4384659454.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W2054141820","https://openalex.org/W2074694452","https://openalex.org/W2094286023","https://openalex.org/W2136189984","https://openalex.org/W2475334473","https://openalex.org/W2512971201","https://openalex.org/W2723293840","https://openalex.org/W2746011824","https://openalex.org/W2787933113","https://openalex.org/W2799544270","https://openalex.org/W2902572901","https://openalex.org/W2963433607","https://openalex.org/W2963654596","https://openalex.org/W2963842088","https://openalex.org/W2996959725","https://openalex.org/W2997914278","https://openalex.org/W3034853385","https://openalex.org/W3049342604","https://openalex.org/W3081226161","https://openalex.org/W3087898974","https://openalex.org/W3102899483","https://openalex.org/W3116249021","https://openalex.org/W3157410348","https://openalex.org/W3159868125","https://openalex.org/W3173335915","https://openalex.org/W3173984942","https://openalex.org/W3206324923","https://openalex.org/W4212774754","https://openalex.org/W4214717370","https://openalex.org/W4296604485","https://openalex.org/W4321479987"],"related_works":["https://openalex.org/W4390273403","https://openalex.org/W4386781444","https://openalex.org/W2150182025","https://openalex.org/W3092950680","https://openalex.org/W3197542405","https://openalex.org/W2056712470","https://openalex.org/W3125580266","https://openalex.org/W4317039510","https://openalex.org/W4238861846","https://openalex.org/W790944756"],"abstract_inverted_index":{"Model-free":[0],"RL-based":[1,62,140],"recommender":[2,31,63,155],"systems":[3,64],"have":[4],"recently":[5],"received":[6],"increasing":[7],"research":[8,24],"attention":[9],"due":[10],"to":[11,14,91],"their":[12],"capability":[13],"handle":[15,92],"partial":[16],"feedback":[17,35,82,95,110],"and":[18],"long-term":[19],"rewards.":[20],"However,":[21],"most":[22],"existing":[23],"has":[25],"ignored":[26],"a":[27,75,85,115,147],"critical":[28],"feature":[29],"in":[30,53,74,84,88],"systems:":[32],"one":[33],"user's":[34],"on":[36,146],"the":[37,93,107,132,135],"same":[38],"item":[39],"at":[40],"different":[41,139],"times":[42],"is":[43],"random.":[44],"The":[45],"stochastic":[46,81,94,101,109],"rewards":[47],"property":[48],"essentially":[49],"differs":[50],"from":[51],"that":[52,105,112],"classic":[54],"RL":[55],"scenarios":[56],"with":[57,111,143],"deterministic":[58],"rewards,":[59],"which":[60],"makes":[61],"much":[65],"more":[66,96],"challenging.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71,98],"first":[72],"demonstrate":[73,131],"simulator":[76,149],"environment":[77],"where":[78],"using":[79],"direct":[80,108],"results":[83],"significant":[86],"drop":[87],"performance.":[89],"Then":[90],"efficiently,":[97],"design":[99],"two":[100],"reward":[102],"stabilization":[103],"frameworks":[104,119,137],"replace":[106],"learned":[113],"by":[114],"supervised":[116,128],"model.":[117],"Both":[118],"are":[120],"model-agnostic,":[121],"i.e.,":[122],"they":[123],"can":[124],"effectively":[125],"utilize":[126],"various":[127],"models.":[129],"We":[130],"superiority":[133],"of":[134],"proposed":[136],"over":[138],"recommendation":[141,148],"baselines":[142],"extensive":[144],"experiments":[145],"as":[150,152],"well":[151],"an":[153],"industrial-level":[154],"system.":[156]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
