{"id":"https://openalex.org/W4403221398","doi":"https://doi.org/10.1145/3640457.3688105","title":"Optimal Baseline Corrections for Off-Policy Contextual Bandits","display_name":"Optimal Baseline Corrections for Off-Policy Contextual Bandits","publication_year":2024,"publication_date":"2024-10-08","ids":{"openalex":"https://openalex.org/W4403221398","doi":"https://doi.org/10.1145/3640457.3688105"},"language":"en","primary_location":{"id":"doi:10.1145/3640457.3688105","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3640457.3688105","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3640457.3688105","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"18th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3640457.3688105","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101995843","display_name":"Shashank Gupta","orcid":"https://orcid.org/0000-0003-1291-7951"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Shashank Gupta","raw_affiliation_strings":["IRLab, University of Amsterdam, The Netherlands, Netherlands"],"affiliations":[{"raw_affiliation_string":"IRLab, University of Amsterdam, The Netherlands, Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083237271","display_name":"Olivier Jeunen","orcid":"https://orcid.org/0000-0001-6256-5814"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivier Jeunen","raw_affiliation_strings":["AI, ShareChat, United Kingdom"],"affiliations":[{"raw_affiliation_string":"AI, ShareChat, United Kingdom","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002072527","display_name":"Harrie Oosterhuis","orcid":"https://orcid.org/0000-0002-0458-9233"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Harrie Oosterhuis","raw_affiliation_strings":["Institute for Computing and Information Sciences, Radboud University, Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Computing and Information Sciences, Radboud University, Netherlands","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031439294","display_name":"Maarten de Rijke","orcid":"https://orcid.org/0000-0002-1086-0202"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Maarten de Rijke","raw_affiliation_strings":["Informatics Institute, University of Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Informatics Institute, University of Amsterdam, Netherlands","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101995843"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":2.9973,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.91659978,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"722","last_page":"732"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.8555086851119995},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5694091320037842},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.14887017011642456}],"concepts":[{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.8555086851119995},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5694091320037842},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.14887017011642456},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3640457.3688105","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3640457.3688105","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3640457.3688105","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"18th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/312217","is_oa":true,"landing_page_url":"https://hdl.handle.net/2066/312217","pdf_url":"https://repository.ubn.ru.nl//bitstream/handle/2066/312217/312217.pdf","source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article in monograph or in proceedings"}],"best_oa_location":{"id":"doi:10.1145/3640457.3688105","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3640457.3688105","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3640457.3688105","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"18th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2289169492","display_name":"Fairness and Intersectional Non-Discrimination in Human Recommendation","funder_award_id":"101070212","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5550786340","display_name":null,"funder_award_id":"024.004.022","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G6009445997","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G6280615682","display_name":"Low Resource Chat-based Conversational Intelligence","funder_award_id":"NWA.1389.20.183","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G629491556","display_name":null,"funder_award_id":"(NWO)","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G6640760388","display_name":null,"funder_award_id":"KICH3.LTP.20.006","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"},{"id":"https://openalex.org/F4320334322","display_name":"HORIZON EUROPE Framework Programme","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403221398.pdf"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1998427280","https://openalex.org/W2045745608","https://openalex.org/W2086206379","https://openalex.org/W2112420033","https://openalex.org/W2119717200","https://openalex.org/W2126002144","https://openalex.org/W2148472246","https://openalex.org/W2463677609","https://openalex.org/W2517816274","https://openalex.org/W2783688698","https://openalex.org/W2893370267","https://openalex.org/W2902572901","https://openalex.org/W2955497741","https://openalex.org/W3003609932","https://openalex.org/W3012881846","https://openalex.org/W3042645155","https://openalex.org/W3080077280","https://openalex.org/W3080116024","https://openalex.org/W3081226161","https://openalex.org/W3085555662","https://openalex.org/W3089214483","https://openalex.org/W3098366254","https://openalex.org/W3098679278","https://openalex.org/W3099464630","https://openalex.org/W3116249021","https://openalex.org/W3121535423","https://openalex.org/W3197958031","https://openalex.org/W3200739262","https://openalex.org/W3201265114","https://openalex.org/W3201286590","https://openalex.org/W3201310492","https://openalex.org/W4214717370","https://openalex.org/W4239207475","https://openalex.org/W4288310870","https://openalex.org/W4290877189","https://openalex.org/W4290931133","https://openalex.org/W4296604485","https://openalex.org/W4296604501","https://openalex.org/W4307302081","https://openalex.org/W4367860606","https://openalex.org/W4385568167","https://openalex.org/W4385688690","https://openalex.org/W4386730467","https://openalex.org/W4392367441","https://openalex.org/W4392384534","https://openalex.org/W4392384882","https://openalex.org/W4393074693","https://openalex.org/W4401863302","https://openalex.org/W6703949738"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2101155126"],"abstract_inverted_index":{"The":[0],"off-policy":[1],"learning":[2],"paradigm":[3],"allows":[4],"for":[5],"recommender":[6],"systems":[7],"and":[8,42],"general":[9],"ranking":[10],"applications":[11],"to":[12,21,46],"be":[13],"framed":[14],"as":[15],"decision-making":[16],"problems,":[17],"where":[18],"we":[19],"aim":[20],"learn":[22],"decision":[23],"policies":[24],"that":[25],"optimize":[26],"an":[27,32],"unbiased":[28],"offline":[29],"estimate":[30],"of":[31,55],"online":[33],"reward":[34],"metric.":[35],"With":[36],"unbiasedness":[37],"comes":[38],"potentially":[39],"high":[40],"variance,":[41],"prevalent":[43],"methods":[44,51],"exist":[45],"reduce":[47],"estimation":[48],"variance.":[49],"These":[50],"typically":[52],"make":[53],"use":[54],"control":[56],"variates,":[57],"either":[58],"additive":[59],"(i.e.,":[60,69],"baseline":[61],"corrections":[62],"or":[63,67],"doubly":[64],"robust":[65],"methods)":[66],"multiplicative":[68],"self-normalisation).":[70]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2025-10-10T00:00:00"}
