{"id":"https://openalex.org/W3201286590","doi":"https://doi.org/10.1145/3460231.3474247","title":"Pessimistic Reward Models for Off-Policy Learning in Recommendation","display_name":"Pessimistic Reward Models for Off-Policy Learning in Recommendation","publication_year":2021,"publication_date":"2021-09-13","ids":{"openalex":"https://openalex.org/W3201286590","doi":"https://doi.org/10.1145/3460231.3474247","mag":"3201286590"},"language":"en","primary_location":{"id":"doi:10.1145/3460231.3474247","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460231.3474247","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Fifteenth ACM Conference on Recommender Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083237271","display_name":"Olivier Jeunen","orcid":"https://orcid.org/0000-0001-6256-5814"},"institutions":[{"id":"https://openalex.org/I149213910","display_name":"University of Antwerp","ror":"https://ror.org/008x57b05","country_code":"BE","type":"education","lineage":["https://openalex.org/I149213910"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Olivier Jeunen","raw_affiliation_strings":["University of Antwerp, Belgium"],"affiliations":[{"raw_affiliation_string":"University of Antwerp, Belgium","institution_ids":["https://openalex.org/I149213910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020321444","display_name":"Bart Goethals","orcid":"https://orcid.org/0000-0001-9327-9554"},"institutions":[{"id":"https://openalex.org/I149213910","display_name":"University of Antwerp","ror":"https://ror.org/008x57b05","country_code":"BE","type":"education","lineage":["https://openalex.org/I149213910"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Bart Goethals","raw_affiliation_strings":["University of Antwerp, Belgium"],"affiliations":[{"raw_affiliation_string":"University of Antwerp, Belgium","institution_ids":["https://openalex.org/I149213910"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5083237271"],"corresponding_institution_ids":["https://openalex.org/I149213910"],"apc_list":null,"apc_paid":null,"fwci":5.054,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.95564377,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"63","last_page":"74"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7993685007095337},{"id":"https://openalex.org/keywords/pessimism","display_name":"Pessimism","score":0.669223427772522},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.6546323299407959},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6485574245452881},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.5909422039985657},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5658385753631592},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5449449419975281},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.5328899621963501},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5305379033088684},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5247864127159119},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4722389876842499},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.44960692524909973},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08808845281600952}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7993685007095337},{"id":"https://openalex.org/C9992130","wikidata":"https://www.wikidata.org/wiki/Q484954","display_name":"Pessimism","level":2,"score":0.669223427772522},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.6546323299407959},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6485574245452881},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.5909422039985657},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5658385753631592},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5449449419975281},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.5328899621963501},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5305379033088684},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5247864127159119},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4722389876842499},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.44960692524909973},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08808845281600952},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3460231.3474247","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460231.3474247","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Fifteenth ACM Conference on Recommender Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321587","display_name":"Vlaamse Overheid","ror":"https://ror.org/04qxsrb28"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":87,"referenced_works":["https://openalex.org/W1575236038","https://openalex.org/W1618543586","https://openalex.org/W1786332878","https://openalex.org/W1809653203","https://openalex.org/W1987431925","https://openalex.org/W2034368206","https://openalex.org/W2074694452","https://openalex.org/W2076618162","https://openalex.org/W2086206379","https://openalex.org/W2108738385","https://openalex.org/W2110654099","https://openalex.org/W2112420033","https://openalex.org/W2122124659","https://openalex.org/W2127186087","https://openalex.org/W2159309155","https://openalex.org/W2163162311","https://openalex.org/W2179435707","https://openalex.org/W2188353343","https://openalex.org/W2280163991","https://openalex.org/W2463645429","https://openalex.org/W2507134384","https://openalex.org/W2511414107","https://openalex.org/W2558073743","https://openalex.org/W2765564115","https://openalex.org/W2785875001","https://openalex.org/W2797563284","https://openalex.org/W2804216207","https://openalex.org/W2809290718","https://openalex.org/W2886601525","https://openalex.org/W2893370267","https://openalex.org/W2897955056","https://openalex.org/W2902572901","https://openalex.org/W2908670005","https://openalex.org/W2911802745","https://openalex.org/W2912745432","https://openalex.org/W2941212468","https://openalex.org/W2944832664","https://openalex.org/W2950382198","https://openalex.org/W2955421345","https://openalex.org/W2962785510","https://openalex.org/W2962818688","https://openalex.org/W2963085847","https://openalex.org/W2963489350","https://openalex.org/W2964059111","https://openalex.org/W2966283211","https://openalex.org/W2972520532","https://openalex.org/W2972675851","https://openalex.org/W2973172293","https://openalex.org/W2974036112","https://openalex.org/W2997352934","https://openalex.org/W2997617192","https://openalex.org/W3009804075","https://openalex.org/W3012881846","https://openalex.org/W3022566517","https://openalex.org/W3025606523","https://openalex.org/W3028135017","https://openalex.org/W3028766998","https://openalex.org/W3033324992","https://openalex.org/W3034606136","https://openalex.org/W3034853385","https://openalex.org/W3042645155","https://openalex.org/W3044938582","https://openalex.org/W3049157983","https://openalex.org/W3080077280","https://openalex.org/W3081226161","https://openalex.org/W3081410692","https://openalex.org/W3084805822","https://openalex.org/W3087931390","https://openalex.org/W3088365077","https://openalex.org/W3088744629","https://openalex.org/W3098638686","https://openalex.org/W3099117208","https://openalex.org/W3099420497","https://openalex.org/W3099464630","https://openalex.org/W3101935024","https://openalex.org/W3103383763","https://openalex.org/W3104349857","https://openalex.org/W3105114834","https://openalex.org/W3105272967","https://openalex.org/W3106195168","https://openalex.org/W3114509039","https://openalex.org/W3117945726","https://openalex.org/W3121902971","https://openalex.org/W3153935502","https://openalex.org/W3173720884","https://openalex.org/W4310299640","https://openalex.org/W4385628968"],"related_works":["https://openalex.org/W4380987628","https://openalex.org/W2418537576","https://openalex.org/W2557514562","https://openalex.org/W214945085","https://openalex.org/W1987935396","https://openalex.org/W3126025002","https://openalex.org/W2354456418","https://openalex.org/W3197683035","https://openalex.org/W2006807542","https://openalex.org/W2077555735"],"abstract_inverted_index":{"Methods":[0],"for":[1,21,43],"bandit":[2],"learning":[3,34,88],"from":[4],"user":[5],"interactions":[6],"often":[7,48],"require":[8],"a":[9,14,26,29,74],"model":[10],"of":[11,25],"the":[12,23,40,51,57,90],"reward":[13,81],"certain":[15],"context-action":[16],"pair":[17],"will":[18],"yield":[19],"\u2013":[20,89,95],"example,":[22],"probability":[24],"click":[27],"on":[28],"recommendation.":[30],"This":[31,83],"common":[32],"machine":[33],"task":[35],"is":[36,47],"highly":[37],"non-trivial,":[38],"as":[39],"data-generating":[41],"process":[42],"contexts":[44],"and":[45],"actions":[46,69],"skewed":[49],"by":[50],"recommender":[52],"system":[53],"itself.":[54],"Indeed,":[55],"when":[56],"deployed":[58],"recommendation":[59],"policy":[60],"at":[61],"data":[62],"collection":[63],"time":[64],"does":[65],"not":[66],"pick":[67],"its":[68],"uniformly-at-random,":[70],"this":[71],"leads":[72],"to":[73],"selection":[75],"bias":[76],"that":[77],"can":[78],"impede":[79],"effective":[80],"modelling.":[82],"in":[84,93],"turn":[85],"makes":[86],"off-policy":[87],"typical":[91],"setup":[92],"industry":[94],"particularly":[96],"challenging.":[97]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
