{"id":"https://openalex.org/W4409158543","doi":"https://doi.org/10.1145/3690624.3709237","title":"Off-Policy Evaluation and Learning for the Future under Non-Stationarity","display_name":"Off-Policy Evaluation and Learning for the Future under Non-Stationarity","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409158543","doi":"https://doi.org/10.1145/3690624.3709237"},"language":"en","primary_location":{"id":"doi:10.1145/3690624.3709237","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3690624.3709237","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035138146","display_name":"Tatsuhiro Shimizu","orcid":"https://orcid.org/0009-0009-9746-3346"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tatsuhiro Shimizu","raw_affiliation_strings":["Yale University, New Haven, Connecticut, USA"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, Connecticut, USA","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087364594","display_name":"Kazuki Kawamura","orcid":"https://orcid.org/0000-0002-5181-320X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kazuki Kawamura","raw_affiliation_strings":["Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051013898","display_name":"Takanori Muroi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takanori Muroi","raw_affiliation_strings":["Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102762244","display_name":"Yusuke Narita","orcid":"https://orcid.org/0000-0003-0314-3384"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yusuke Narita","raw_affiliation_strings":["Yale University, New Haven, CT, USA"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT, USA","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099784235","display_name":"Kei Tateno","orcid":"https://orcid.org/0009-0000-8249-2659"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kei Tateno","raw_affiliation_strings":["Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067751383","display_name":"Takuma Udagawa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takuma Udagawa","raw_affiliation_strings":["Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101991694","display_name":"Yuta Saito","orcid":"https://orcid.org/0000-0003-4357-5835"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuta Saito","raw_affiliation_strings":["Cornell University, Ithaca, NY, USA"],"affiliations":[{"raw_affiliation_string":"Cornell University, Ithaca, NY, USA","institution_ids":["https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5035138146"],"corresponding_institution_ids":["https://openalex.org/I32971472"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07401458,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1256","last_page":"1264"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6388059854507446},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43742191791534424},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3922995328903198}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6388059854507446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43742191791534424},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3922995328903198}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3690624.3709237","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3690624.3709237","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W2605323209","https://openalex.org/W2906762886","https://openalex.org/W2990138404","https://openalex.org/W3001218531","https://openalex.org/W3033324992","https://openalex.org/W3099117208","https://openalex.org/W3101234416","https://openalex.org/W3200739262","https://openalex.org/W3201286590","https://openalex.org/W4213113302","https://openalex.org/W4242771733","https://openalex.org/W4292423901","https://openalex.org/W4382203490","https://openalex.org/W4396723171","https://openalex.org/W6601804787"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"We":[0],"study":[1],"the":[2,17,43,47,58,69,76,106,131,145,163,171,191,235],"novel":[3,100],"problem":[4],"of":[5,20,124,182],"future":[6,18,70,118,138,166,215,236],"off-policy":[7],"evaluation":[8],"(F-OPE)":[9],"and":[10,15,41,165,233],"learning":[11],"(F-OPL)":[12],"for":[13,33,46,105,111,151,241],"estimating":[14,113,232],"optimizing":[16,234],"value":[19,45,238],"policies":[21],"in":[22,57,75,144,161,231],"non-stationary":[23],"environments,":[24],"where":[25],"distributions":[26],"vary":[27],"over":[28],"time.":[29],"In":[30,198],"e-commerce":[31],"recommendations,":[32],"instance,":[34],"our":[35,202,225],"goal":[36],"is":[37,64,72,126,170],"often":[38],"to":[39,68,90,129,173,204,210],"estimate":[40],"optimize":[42],"policy":[44,56,114,216,237],"upcoming":[48],"month":[49],"using":[50,217],"data":[51,66,139],"collected":[52],"by":[53],"an":[54],"old":[55],"previous":[59],"month.":[60],"A":[61],"critical":[62],"challenge":[63],"that":[65,158,224],"related":[67],"environment":[71],"not":[73,141],"observed":[74],"historical":[77,146,164,219],"data.":[78,136,167,220],"Existing":[79],"methods":[80,226,230],"assume":[81],"stationarity":[82],"or":[83,155],"depend":[84],"on":[85],"restrictive":[86],"reward-modeling":[87],"assumptions,":[88],"leading":[89],"significant":[91],"bias.":[92],"To":[93],"address":[94],"these":[95,175],"limitations,":[96],"we":[97,148,200],"propose":[98],"a":[99,179,206,213],"estimator":[101,169,203],"named":[102],"Off-Policy":[103],"Estimator":[104],"Future":[107],"Value":[108],"(OPFV),":[109],"designed":[110],"accurately":[112],"values":[115],"at":[116],"any":[117],"time":[119],"point.":[120],"The":[121],"key":[122],"feature":[123],"OPFV":[125,195],"its":[127],"ability":[128],"leverage":[130],"useful":[132],"structure":[133],"within":[134],"time-series":[135],"While":[137],"might":[140],"be":[142],"present":[143],"log,":[147],"can":[149],"leverage,":[150],"example,":[152],"seasonal,":[153],"weekly,":[154],"holiday":[156],"effects":[157],"are":[159],"consistent":[160],"both":[162],"Our":[168],"first":[172],"exploit":[174],"time-related":[176],"structures":[177],"via":[178],"new":[180,207],"type":[181],"importance":[183],"weighting,":[184],"enabling":[185],"effective":[186],"F-OPE.":[187],"Theoretical":[188],"analysis":[189],"identifies":[190],"conditions":[192],"under":[193,239],"which":[194],"becomes":[196],"low-bias.":[197],"addition,":[199],"extend":[201],"develop":[205],"policy-gradient":[208],"method":[209],"proactively":[211],"learn":[212],"good":[214],"only":[218],"Empirical":[221],"results":[222],"show":[223],"substantially":[227],"outperform":[228],"existing":[229],"non-stationarity":[240],"various":[242],"experimental":[243],"setups.":[244]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
