{"id":"https://openalex.org/W2809162153","doi":"https://doi.org/10.1145/3219819.3220122","title":"Stabilizing Reinforcement Learning in Dynamic Environment with Application to Online Recommendation","display_name":"Stabilizing Reinforcement Learning in Dynamic Environment with Application to Online Recommendation","publication_year":2018,"publication_date":"2018-07-19","ids":{"openalex":"https://openalex.org/W2809162153","doi":"https://doi.org/10.1145/3219819.3220122","mag":"2809162153"},"language":"en","primary_location":{"id":"doi:10.1145/3219819.3220122","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3219819.3220122","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101922392","display_name":"Shiyong Chen","orcid":"https://orcid.org/0009-0005-6293-7792"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shi-Yong Chen","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100342263","display_name":"Yang Yu","orcid":"https://orcid.org/0000-0002-1732-9545"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yu","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070849903","display_name":"Qing Da","orcid":"https://orcid.org/0000-0003-2200-0098"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Da","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100770589","display_name":"Jun Tan","orcid":"https://orcid.org/0000-0001-5827-4694"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Tan","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070798885","display_name":"Haikuan Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai-Kuan Huang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114321317","display_name":"Hai-Hong Tang","orcid":"https://orcid.org/0000-0001-6797-2988"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai-Hong Tang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101922392"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":13.8603,"has_fulltext":false,"cited_by_count":146,"citation_normalized_percentile":{"value":0.98950692,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1187","last_page":"1196"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9036585688591003},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8438920974731445},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.4660421907901764},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.45238780975341797},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4469642639160156},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44046127796173096},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4299255311489105},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.08296212553977966}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9036585688591003},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8438920974731445},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.4660421907901764},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.45238780975341797},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4469642639160156},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44046127796173096},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4299255311489105},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.08296212553977966},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3219819.3220122","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3219819.3220122","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1515851193","https://openalex.org/W1550736008","https://openalex.org/W1757796397","https://openalex.org/W2007376249","https://openalex.org/W2096698697","https://openalex.org/W2109711670","https://openalex.org/W2114986399","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2168405694","https://openalex.org/W2173564293","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2280163991","https://openalex.org/W2344944957","https://openalex.org/W2582946978","https://openalex.org/W2588664674","https://openalex.org/W2766447205","https://openalex.org/W2783573456","https://openalex.org/W2952152174","https://openalex.org/W2964108915","https://openalex.org/W3011120880","https://openalex.org/W4212774754"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2964765435"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,21,45],"learning":[2,13,22,46],"has":[3],"shown":[4],"great":[5],"potential":[6],"in":[7,28,40,72,89,132,137,182],"improving":[8],"system":[9,136],"performance":[10,43,56,181],"autonomously,":[11],"by":[12],"from":[14,106],"iterations":[15],"with":[16,119],"the":[17,35,42,55,59,65,69,84,92,98,104,107,111,116,124,133,146,151,174,180],"environment.":[18,186],"However,":[19],"traditional":[20],"approaches":[23,47],"are":[24,37],"designed":[25],"to":[26,68,82,161],"work":[27],"static":[29],"environments.":[30,74],"In":[31,75],"many":[32],"real-world":[33,184],"problems,":[34],"environments":[36],"commonly":[38],"dynamic,":[39],"which":[41,102],"of":[44,54,64,150],"can":[48,171],"degrade":[49],"drastically.":[50],"A":[51],"direct":[52],"cause":[53],"degradation":[57],"is":[58],"high-variance":[60],"and":[61,97,110],"biased":[62],"estimation":[63,87,176],"reward,":[66,101],"due":[67],"distribution":[70],"shifting":[71],"dynamic":[73,90,148,185],"this":[76,183],"paper,":[77],"we":[78,122],"propose":[79,123],"two":[80,117],"techniques":[81,118],"alleviate":[83],"unstable":[85],"reward":[86,112],"problem":[88,105],"environments,":[91],"stratified":[93],"sampling":[94],"replay":[95],"strategy":[96],"approximate":[99],"regretted":[100],"address":[103],"sample":[108],"aspect":[109],"aspect,":[113],"respectively.":[114],"Integrating":[115],"Double":[120],"DQN,":[121],"Robust":[125,130,163,169],"DQN":[126,131,170],"method.":[127],"We":[128,143,154],"apply":[129],"tip":[134],"recommendation":[135,152],"Taobao":[138],"online":[139,158],"retail":[140],"trading":[141],"platform.":[142],"firstly":[144],"disclose":[145],"highly":[147],"property":[149],"application.":[153],"then":[155],"carried":[156],"out":[157],"A/B":[159],"test":[160],"examine":[162],"DQN.":[164],"The":[165],"results":[166],"show":[167],"that":[168],"effectively":[172],"stabilize":[173],"value":[175],"and,":[177],"therefore,":[178],"improves":[179]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":19},{"year":2021,"cited_by_count":33},{"year":2020,"cited_by_count":21},{"year":2019,"cited_by_count":22},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
