{"id":"https://openalex.org/W3004166557","doi":"https://doi.org/10.1145/3385670","title":"Safe Exploration for Optimizing Contextual Bandits","display_name":"Safe Exploration for Optimizing Contextual Bandits","publication_year":2020,"publication_date":"2020-04-21","ids":{"openalex":"https://openalex.org/W3004166557","doi":"https://doi.org/10.1145/3385670","mag":"3004166557"},"language":"en","primary_location":{"id":"doi:10.1145/3385670","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3385670","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2002.00467","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042795557","display_name":"Rolf Jagerman","orcid":"https://orcid.org/0000-0002-5169-495X"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Rolf Jagerman","raw_affiliation_strings":["University of Amsterdam, Amsterdam, The Netherlands","University of Amsterdam,Amsterdam,the Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"University of Amsterdam,Amsterdam,the Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103028673","display_name":"Ilya Markov","orcid":"https://orcid.org/0000-0001-9221-3043"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Ilya Markov","raw_affiliation_strings":["University of Amsterdam, Amsterdam, The Netherlands","University of Amsterdam,Amsterdam,the Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"University of Amsterdam,Amsterdam,the Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031439294","display_name":"Maarten de Rijke","orcid":"https://orcid.org/0000-0002-1086-0202"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Maarten De Rijke","raw_affiliation_strings":["University of Amsterdam, Amsterdam, The Netherlands","University of Amsterdam,Amsterdam,the Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-1086-0202","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"University of Amsterdam,Amsterdam,the Netherlands","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042795557"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01881084,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"38","issue":"3","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.8825806379318237},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.789522111415863},{"id":"https://openalex.org/keywords/harm","display_name":"Harm","score":0.7524411678314209},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.6992303133010864},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6705907583236694},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6378898620605469},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.6360836029052734},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5647122859954834},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5509070158004761},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.541883111000061},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.5380122065544128},{"id":"https://openalex.org/keywords/multi-armed-bandit","display_name":"Multi-armed bandit","score":0.44272685050964355},{"id":"https://openalex.org/keywords/operations-research","display_name":"Operations research","score":0.3748883605003357},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14394885301589966},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.1406194269657135},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.12564757466316223},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11174613237380981},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.09616836905479431},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08639472723007202}],"concepts":[{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.8825806379318237},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.789522111415863},{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.7524411678314209},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.6992303133010864},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6705907583236694},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6378898620605469},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.6360836029052734},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5647122859954834},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5509070158004761},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.541883111000061},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.5380122065544128},{"id":"https://openalex.org/C123197309","wikidata":"https://www.wikidata.org/wiki/Q2882343","display_name":"Multi-armed bandit","level":3,"score":0.44272685050964355},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3748883605003357},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14394885301589966},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.1406194269657135},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12564757466316223},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11174613237380981},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.09616836905479431},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08639472723007202},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":10,"locations":[{"id":"doi:10.1145/3385670","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3385670","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/a3b7560e-3dab-4dc4-baf7-a758ad01e072","is_oa":false,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/safe-exploration-for-optimizing-contextual-bandits(a3b7560e-3dab-4dc4-baf7-a758ad01e072).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Jagerman, R, Markov, I & de Rijke, M 2020, 'Safe Exploration for Optimizing Contextual Bandits', ACM Transactions on Information Systems, vol. 38, no. 3, 24. https://doi.org/10.1145/3385670","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2002.00467","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.00467","pdf_url":"https://arxiv.org/pdf/2002.00467","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:dare.uva.nl:publications/0db12b65-fdf9-47f8-9b3b-27fca4481a02","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/safe-exploration-for-optimizing-contextual-bandits(0db12b65-fdf9-47f8-9b3b-27fca4481a02).html","pdf_url":"https://pure.uva.nl/ws/files/53920830/jagerman_2020_safe_arxiv.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Jagerman, R, Markov, I & de Rijke, M 2020 'Safe Exploration for Optimizing Contextual Bandits' pp. 23. https://doi.org/10.48550/arXiv.2002.00467","raw_type":"info:eu-repo/semantics/preprint"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/0db12b65-fdf9-47f8-9b3b-27fca4481a02","is_oa":false,"landing_page_url":"https://hdl.handle.net/11245.1/0db12b65-fdf9-47f8-9b3b-27fca4481a02","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Jagerman, R, Markov, I & de Rijke, M 2020 'Safe Exploration for Optimizing Contextual Bandits' pp. 23. https://doi.org/10.48550/arXiv.2002.00467","raw_type":"info:eu-repo/semantics/preprint"},{"id":"pmh:oai:dare.uva.nl:publications/a3b7560e-3dab-4dc4-baf7-a758ad01e072","is_oa":false,"landing_page_url":"https://hdl.handle.net/11245.1/a3b7560e-3dab-4dc4-baf7-a758ad01e072","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Jagerman, R, Markov, I & de Rijke, M 2020, 'Safe Exploration for Optimizing Contextual Bandits', ACM Transactions on Information Systems, vol. 38, no. 3, 24. https://doi.org/10.1145/3385670","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:uvapub:oai:dare.uva.nl:publications/0db12b65-fdf9-47f8-9b3b-27fca4481a02","is_oa":true,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/safe-exploration-for-optimizing-contextual-bandits(0db12b65-fdf9-47f8-9b3b-27fca4481a02).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"TITLE=None","raw_type":"info:eu-repo/semantics/other"},{"id":"pmh:uvapub:oai:dare.uva.nl:publications/a3b7560e-3dab-4dc4-baf7-a758ad01e072","is_oa":false,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/safe-exploration-for-optimizing-contextual-bandits(a3b7560e-3dab-4dc4-baf7-a758ad01e072).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Transactions on Information Systems, 38(3):24. Association for Computing Machinery (ACM)","raw_type":"info:eu-repo/semantics/article"},{"id":"doi:10.48550/arxiv.2002.00467","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2002.00467","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3004166557","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2002.00467","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.00467","pdf_url":"https://arxiv.org/pdf/2002.00467","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/14","display_name":"Life below water","score":0.7799999713897705}],"awards":[{"id":"https://openalex.org/G2586376837","display_name":null,"funder_award_id":"612.001.551","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321182","display_name":"VSNU Vereniging van Universiteiten","ror":"https://ror.org/05wvdt748"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3004166557.pdf","grobid_xml":"https://content.openalex.org/works/W3004166557.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1493526108","https://openalex.org/W1530210183","https://openalex.org/W1835900096","https://openalex.org/W1840625103","https://openalex.org/W1986842597","https://openalex.org/W2009979684","https://openalex.org/W2020160576","https://openalex.org/W2047221353","https://openalex.org/W2069870183","https://openalex.org/W2073248814","https://openalex.org/W2078957694","https://openalex.org/W2094790959","https://openalex.org/W2099471337","https://openalex.org/W2100659887","https://openalex.org/W2112420033","https://openalex.org/W2112430581","https://openalex.org/W2113065326","https://openalex.org/W2126714083","https://openalex.org/W2138909795","https://openalex.org/W2144870877","https://openalex.org/W2147892741","https://openalex.org/W2148224612","https://openalex.org/W2150102617","https://openalex.org/W2152314154","https://openalex.org/W2153170359","https://openalex.org/W2162059449","https://openalex.org/W2166253248","https://openalex.org/W2176930994","https://openalex.org/W2273088453","https://openalex.org/W2293743194","https://openalex.org/W2402441596","https://openalex.org/W2406454855","https://openalex.org/W2470088391","https://openalex.org/W2505872937","https://openalex.org/W2507134384","https://openalex.org/W2534277863","https://openalex.org/W2740384884","https://openalex.org/W2755613412","https://openalex.org/W2767455847","https://openalex.org/W2769473018","https://openalex.org/W2884475480","https://openalex.org/W2946248329","https://openalex.org/W2949880737","https://openalex.org/W2963099933","https://openalex.org/W2965151591","https://openalex.org/W3003609932","https://openalex.org/W3038071833","https://openalex.org/W3104013016","https://openalex.org/W3104881842","https://openalex.org/W4288280739","https://openalex.org/W4302322961"],"related_works":["https://openalex.org/W3099117208","https://openalex.org/W2581634139","https://openalex.org/W3185828874","https://openalex.org/W2909588145","https://openalex.org/W2158641818","https://openalex.org/W778742492","https://openalex.org/W588099057","https://openalex.org/W52170320","https://openalex.org/W2963124687","https://openalex.org/W3166204731","https://openalex.org/W3107549751","https://openalex.org/W3206282417","https://openalex.org/W603830301","https://openalex.org/W2907704766","https://openalex.org/W3131310681","https://openalex.org/W2998219625","https://openalex.org/W2902033370","https://openalex.org/W3005607450","https://openalex.org/W2563736478","https://openalex.org/W2097191079"],"abstract_inverted_index":{"Contextual":[0],"bandit":[1,29,80,261],"problems":[2,30],"are":[3],"a":[4,63,74,95,133,248],"natural":[5],"fit":[6],"for":[7,27,78,259],"many":[8],"information":[9],"retrieval":[10,240],"tasks,":[11],"such":[12],"as":[13,170,172],"learning":[14,25,76,130,257],"to":[15,62,114,123,131,150,185,191,228,253],"rank,":[16],"text":[17,236],"classification,":[18],"recommendation,":[19],"and":[20,238,255],"so":[21],"on.":[22],"However,":[23],"existing":[24],"methods":[26,258],"contextual":[28,79,260],"have":[31],"one":[32],"of":[33,43,140,154,162,175,196],"two":[34],"drawbacks:":[35],"They":[36],"either":[37],"do":[38],"not":[39,105,213],"explore":[40,193],"the":[41,54,69,88,107,138,141,152,155,160,163,173,176,182,197,207,215,221,242],"space":[42,223],"all":[44],"possible":[45],"document":[46,239],"rankings":[47,61],"(i.e.,":[48,101],"actions)":[49],"and,":[50,65,110,120,210,224],"thus,":[51,66,111,121,211,225],"may":[52,67],"miss":[53],"optimal":[55,231],"ranking,":[56],"or":[57],"they":[58],"present":[59],"suboptimal":[60,118],"user":[64,70,108,216],"harm":[68,106,214],"experience.":[71],"We":[72],"introduce":[73],"new":[75,134,183,187],"method":[77],"problems,":[81],"Safe":[82],"Exploration":[83],"Algorithm":[84],"(SEA),":[85],"which":[86,103],"overcomes":[87],"above":[89,243],"drawbacks.":[90],"SEA":[91,127,144,179,202,246],"starts":[92,180],"by":[93,244],"using":[94,181,235],"baseline":[96,142,177,208],"(or":[97],"production)":[98],"ranking":[99],"system":[100],"policy),":[102],"does":[104,212],"experience":[109],"is":[112,167],"safe":[113],"execute":[115,186],"but":[116],"has":[117],"performance":[119,153,161,174],"needs":[122],"be":[124],"improved.":[125],"Then":[126],"uses":[128,146],"counterfactual":[129],"learn":[132],"policy":[135,166,184,209],"based":[136],"on":[137],"behavior":[139],"policy.":[143,158,232],"also":[145],"high-confidence":[147],"off-policy":[148],"evaluation":[149],"estimate":[151],"newly":[156,164],"learned":[157,165],"Once":[159],"at":[168],"least":[169],"good":[171],"policy,":[178],"actions,":[188],"allowing":[189],"it":[190],"actively":[192],"favorable":[194],"regions":[195],"action":[198,222],"space.":[199],"This":[200],"way,":[201],"never":[203],"performs":[204],"worse":[205],"than":[206],"experience,":[217],"while":[218],"still":[219],"exploring":[220],"being":[226],"able":[227],"find":[229],"an":[230],"Our":[233],"experiments":[234],"classification":[237],"confirm":[241],"comparing":[245],"(and":[247],"boundless":[249],"variant":[250],"called":[251],"BSEA)":[252],"online":[254],"offline":[256],"problems.":[262]},"counts_by_year":[],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
