{"id":"https://openalex.org/W3116249021","doi":"https://doi.org/10.1145/3437963.3441764","title":"User Response Models to Improve a REINFORCE Recommender System","display_name":"User Response Models to Improve a REINFORCE Recommender System","publication_year":2021,"publication_date":"2021-03-06","ids":{"openalex":"https://openalex.org/W3116249021","doi":"https://doi.org/10.1145/3437963.3441764","mag":"3116249021"},"language":"en","primary_location":{"id":"doi:10.1145/3437963.3441764","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437963.3441764","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441764","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441764","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100699702","display_name":"Minmin Chen","orcid":"https://orcid.org/0000-0002-7342-9022"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Minmin Chen","raw_affiliation_strings":["Google, Inc., Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Inc., Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033769984","display_name":"Bo Chang","orcid":"https://orcid.org/0000-0001-7429-7212"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Chang","raw_affiliation_strings":["Google, Inc., Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Inc., Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101493343","display_name":"Can Xu","orcid":"https://orcid.org/0000-0002-4254-8678"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Can Xu","raw_affiliation_strings":["Google, Inc., Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Inc., Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028125399","display_name":"Ed H.","orcid":"https://orcid.org/0000-0003-3230-5338"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ed H. Chi","raw_affiliation_strings":["Google, Inc., Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Inc., Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100699702"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":6.7973,"has_fulltext":true,"cited_by_count":43,"citation_normalized_percentile":{"value":0.97108743,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"121","last_page":"129"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.8801133632659912},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.856299877166748},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7618395090103149},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5729281306266785},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5520610213279724},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4726925194263458},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.43931156396865845},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3893628716468811},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.388834685087204},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3842352330684662}],"concepts":[{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.8801133632659912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.856299877166748},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7618395090103149},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5729281306266785},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5520610213279724},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4726925194263458},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.43931156396865845},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3893628716468811},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.388834685087204},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3842352330684662},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3437963.3441764","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437963.3441764","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441764","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3437963.3441764","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437963.3441764","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441764","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3116249021.pdf","grobid_xml":"https://content.openalex.org/works/W3116249021.grobid-xml"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W1835900096","https://openalex.org/W1924770834","https://openalex.org/W2042281163","https://openalex.org/W2054141820","https://openalex.org/W2064675550","https://openalex.org/W2100664567","https://openalex.org/W2111355007","https://openalex.org/W2119717200","https://openalex.org/W2120346334","https://openalex.org/W2138108551","https://openalex.org/W2160308170","https://openalex.org/W2188353343","https://openalex.org/W2215378786","https://openalex.org/W2312609093","https://openalex.org/W2340679559","https://openalex.org/W2512971201","https://openalex.org/W2560647685","https://openalex.org/W2766447205","https://openalex.org/W2769112066","https://openalex.org/W2784068709","https://openalex.org/W2788388592","https://openalex.org/W2891303672","https://openalex.org/W2900152462","https://openalex.org/W2902572901","https://openalex.org/W2906424389","https://openalex.org/W2921980263","https://openalex.org/W2948345531","https://openalex.org/W2949608212","https://openalex.org/W2950872548","https://openalex.org/W2962694510","https://openalex.org/W2962736495","https://openalex.org/W2963303028","https://openalex.org/W2963519394","https://openalex.org/W2964108915","https://openalex.org/W2965512832","https://openalex.org/W2970168598","https://openalex.org/W2971262355","https://openalex.org/W3003416843","https://openalex.org/W3003609932","https://openalex.org/W3022566517","https://openalex.org/W3099420497","https://openalex.org/W3122507327","https://openalex.org/W4225636568","https://openalex.org/W4293585414","https://openalex.org/W6630221451","https://openalex.org/W6677737365"],"related_works":["https://openalex.org/W4390273403","https://openalex.org/W4386781444","https://openalex.org/W2150182025","https://openalex.org/W3092950680","https://openalex.org/W3197542405","https://openalex.org/W2056712470","https://openalex.org/W3125580266","https://openalex.org/W4288390103","https://openalex.org/W4317039510","https://openalex.org/W4238861846"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"techniques":[3],"have":[4,35],"been":[5],"sought":[6],"after":[7],"as":[8],"the":[9,15,51,61,82,110,147,150,156,171,176],"next-generation":[10],"tools":[11],"to":[12,36,72,108,127,145,169,216],"further":[13],"advance":[14],"field":[16],"of":[17,24,53,63,85,90,112,149,178,191,193,208,212,214],"recommendation":[18,33,204],"research.":[19],"Different":[20],"from":[21],"classic":[22],"applications":[23],"RL,":[25],"recommender":[26,98,157],"agents,":[27],"especially":[28],"those":[29],"deployed":[30],"on":[31,165,201],"commercial":[32],"platforms,":[34],"operate":[37],"in":[38,50,60,79,181],"extremely":[39,77],"large":[40],"state":[41,151],"and":[42,55,152,185,210],"action":[43,153],"spaces,":[44],"serving":[45,206],"a":[46,56,105,162],"dynamic":[47],"user":[48,69,142,194],"base":[49],"order":[52,62],"billions,":[54],"long-tail":[57],"item":[58],"corpus":[59],"millions":[64,192,213],"or":[65,137],"billions.":[66],"The":[67],"(positive)":[68],"feedback":[70],"available":[71],"train":[73],"such":[74],"agents":[75,96,115],"is":[76,88],"scarce":[78],"retrospect.":[80],"Improving":[81],"sample":[83,121],"efficiency":[84],"RL":[86,95,114],"algorithms":[87],"thus":[89],"paramount":[91],"importance":[92],"when":[93],"developing":[94],"for":[97,119,155],"systems.":[99],"In":[100],"this":[101],"work,":[102],"we":[103,125],"present":[104],"general":[106],"framework":[107],"augment":[109],"training":[111],"model-free":[113],"with":[116],"auxiliary":[117],"tasks":[118,130],"improved":[120],"efficiency.":[122],"More":[123],"specifically,":[124],"opt":[126],"add":[128],"additional":[129],"that":[131],"predict":[132],"users'":[133],"immediate":[134],"responses":[135],"(positive":[136],"negative)":[138],"toward":[139],"recommendations,":[140],"i.e.,":[141],"response":[143],"modeling,":[144],"enhance":[146],"learning":[148,184],"representations":[154],"agents.":[158],"We":[159,174,196],"also":[160,197],"introduce":[161],"tool":[163],"based":[164],"gradient":[166],"correlation":[167],"analysis":[168],"guide":[170],"model":[172],"design.":[173],"showcase":[175],"efficacy":[177],"our":[179],"method":[180],"offline":[182],"experiments,":[183],"evaluating":[186],"agent":[187],"policies":[188],"over":[189],"hundreds":[190],"trajectories.":[195],"conduct":[198],"live":[199],"experiments":[200],"an":[202],"industrial":[203],"platform":[205],"billions":[207],"users":[209],"tens":[211],"items":[215],"verify":[217],"its":[218],"benefit.":[219]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":3}],"updated_date":"2026-02-27T16:54:17.756197","created_date":"2025-10-10T00:00:00"}
