{"id":"https://openalex.org/W2996959725","doi":"https://doi.org/10.1145/3336191.3371801","title":"Pseudo Dyna-Q","display_name":"Pseudo Dyna-Q","publication_year":2020,"publication_date":"2020-01-20","ids":{"openalex":"https://openalex.org/W2996959725","doi":"https://doi.org/10.1145/3336191.3371801","mag":"2996959725"},"language":"en","primary_location":{"id":"doi:10.1145/3336191.3371801","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3336191.3371801","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089307887","display_name":"Lixin Zou","orcid":"https://orcid.org/0000-0001-6755-871X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lixin Zou","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103251082","display_name":"Long Xia","orcid":"https://orcid.org/0000-0003-2580-6206"},"institutions":[{"id":"https://openalex.org/I192455969","display_name":"York University","ror":"https://ror.org/05fq50484","country_code":"CA","type":"education","lineage":["https://openalex.org/I192455969"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Long Xia","raw_affiliation_strings":["York University, Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"York University, Toronto, Canada","institution_ids":["https://openalex.org/I192455969"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103222554","display_name":"Pan Du","orcid":"https://orcid.org/0000-0002-4229-0902"},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Pan Du","raw_affiliation_strings":["University of Montreal, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"University of Montreal, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429710","display_name":"Zhuo Zhang","orcid":"https://orcid.org/0000-0002-6515-0021"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhuo Zhang","raw_affiliation_strings":["University of Melbourne, Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"University of Melbourne, Melbourne, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101588984","display_name":"Ting Bai","orcid":"https://orcid.org/0009-0001-7603-451X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ting Bai","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687147","display_name":"Weidong Liu","orcid":"https://orcid.org/0000-0002-2276-2159"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weidong Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018977183","display_name":"Jian\u2010Yun Nie","orcid":"https://orcid.org/0000-0003-1556-3335"},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jian-Yun Nie","raw_affiliation_strings":["University of Montreal, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"University of Montreal, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101771060","display_name":"Dawei Yin","orcid":"https://orcid.org/0000-0002-0684-6205"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Yin","raw_affiliation_strings":["JD Data Science Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"JD Data Science Lab, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5089307887"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":23.1268,"has_fulltext":false,"cited_by_count":108,"citation_normalized_percentile":{"value":0.99456083,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"816","last_page":"824"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.830409049987793},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7863377332687378},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.6687383055686951},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5877875685691833},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5732006430625916},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.550642728805542},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5336834788322449},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43644025921821594},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.41388657689094543},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38537919521331787},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2669306993484497},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.12146136164665222}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.830409049987793},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7863377332687378},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.6687383055686951},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5877875685691833},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5732006430625916},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.550642728805542},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5336834788322449},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43644025921821594},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.41388657689094543},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38537919521331787},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2669306993484497},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.12146136164665222},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3336191.3371801","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3336191.3371801","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.41999998688697815,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1757796397","https://openalex.org/W1980035368","https://openalex.org/W2054141820","https://openalex.org/W2086206379","https://openalex.org/W2112420033","https://openalex.org/W2118718620","https://openalex.org/W2121863487","https://openalex.org/W2132314908","https://openalex.org/W2138108551","https://openalex.org/W2165905123","https://openalex.org/W2171279286","https://openalex.org/W2215378786","https://openalex.org/W2253995343","https://openalex.org/W2257979135","https://openalex.org/W2295739661","https://openalex.org/W2358698356","https://openalex.org/W2400213106","https://openalex.org/W2475334473","https://openalex.org/W2494566063","https://openalex.org/W2604639157","https://openalex.org/W2604822632","https://openalex.org/W2746011824","https://openalex.org/W2766447205","https://openalex.org/W2782696945","https://openalex.org/W2783272285","https://openalex.org/W2783944588","https://openalex.org/W2787933113","https://openalex.org/W2788295351","https://openalex.org/W2798494119","https://openalex.org/W2798984840","https://openalex.org/W2799544270","https://openalex.org/W2886601525","https://openalex.org/W2941385591","https://openalex.org/W2950382198","https://openalex.org/W2951008357","https://openalex.org/W2951570486","https://openalex.org/W2952613481","https://openalex.org/W2954765895","https://openalex.org/W2962802563","https://openalex.org/W2963250930","https://openalex.org/W2963403868","https://openalex.org/W2963842088","https://openalex.org/W2964044287","https://openalex.org/W2964965220","https://openalex.org/W2965512832","https://openalex.org/W3102778384","https://openalex.org/W4240768087","https://openalex.org/W4293585414","https://openalex.org/W6630221451"],"related_works":["https://openalex.org/W4390273403","https://openalex.org/W4386781444","https://openalex.org/W2150182025","https://openalex.org/W3092950680","https://openalex.org/W3197542405","https://openalex.org/W3125580266","https://openalex.org/W44246808","https://openalex.org/W4317039510","https://openalex.org/W4238861846","https://openalex.org/W790944756"],"abstract_inverted_index":{"Applying":[0],"reinforcement":[1],"learning":[2,25,72],"(RL)":[3],"in":[4],"recommender":[5,42],"systems":[6],"is":[7,38,121,141],"attractive":[8],"but":[9,172],"costly":[10],"due":[11],"to":[12,39,55,110,115,123,148],"the":[13,16,56,65,117,125,129,138,149,155,161,194,206],"constraint":[14],"of":[15,58,103,132,163,169,186,189,208],"interaction":[17],"with":[18,28,105],"real":[19,29,106,179],"customers,":[20,107],"where":[21],"performing":[22],"online":[23],"policy":[24,63,136,197],"through":[26],"interacting":[27,104],"customers":[30],"usually":[31],"harms":[32],"customer":[33,112],"experiences.":[34],"A":[35],"practical":[36],"alternative":[37],"build":[40],"a":[41,111,182],"agent":[43],"offline":[44,53,71,196],"from":[45],"logged":[46,51,133],"data,":[47],"whereas":[48],"directly":[49],"using":[50],"data":[52],"leads":[54],"problem":[57],"selection":[59,130],"bias":[60,131,200],"between":[61],"logging":[62],"and":[64,79,127,144,165,201],"recommendation":[66,151],"policy.":[67,152],"The":[68],"existing":[69,170],"direct":[70],"algorithms,":[73],"such":[74],"as":[75,116],"Monte":[76],"Carlo":[77],"methods":[78,82],"temporal":[80],"difference":[81],"are":[83],"either":[84],"computationally":[85],"expensive":[86],"or":[87],"unstable":[88],"on":[89,210],"convergence.":[90],"To":[91],"address":[92],"these":[93],"issues,":[94],"we":[95,108],"propose":[96],"Pseudo":[97],"Dyna-Q":[98],"(PDQ).":[99],"In":[100],"PDQ,":[101],"instead":[102],"resort":[109],"simulator,":[113],"referred":[114],"World":[118,139],"Model,":[119],"which":[120],"designed":[122],"simulate":[124],"environment":[126],"handle":[128],"data.":[134],"During":[135],"improvement,":[137],"Model":[140],"constantly":[142],"updated":[143],"optimized":[145],"adaptively,":[146],"according":[147],"current":[150],"This":[153],"way,":[154],"proposed":[156],"PDQ":[157,209],"not":[158],"only":[159],"avoids":[160],"instability":[162],"convergence":[164],"high":[166],"computation":[167],"cost":[168],"approaches":[171],"also":[173],"provides":[174],"unlimited":[175],"interactions":[176],"without":[177],"involving":[178],"customers.":[180],"Moreover,":[181],"proved":[183],"upper":[184],"bound":[185],"empirical":[187],"error":[188],"reward":[190],"function":[191],"guarantees":[192],"that":[193],"learned":[195],"has":[198],"lower":[199],"variance.":[202],"Extensive":[203],"experiments":[204],"demonstrated":[205],"advantages":[207],"two":[211],"real-world":[212],"datasets":[213],"against":[214],"state-of-the-arts":[215],"methods.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":23},{"year":2022,"cited_by_count":26},{"year":2021,"cited_by_count":26},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2020-01-10T00:00:00"}
