{"id":"https://openalex.org/W4310895664","doi":"https://doi.org/10.1109/itw54588.2022.9965820","title":"Regret Analysis for RL using Renewal Bandit Feedback","display_name":"Regret Analysis for RL using Renewal Bandit Feedback","publication_year":2022,"publication_date":"2022-11-01","ids":{"openalex":"https://openalex.org/W4310895664","doi":"https://doi.org/10.1109/itw54588.2022.9965820"},"language":"en","primary_location":{"id":"doi:10.1109/itw54588.2022.9965820","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itw54588.2022.9965820","pdf_url":null,"source":{"id":"https://openalex.org/S4363606450","display_name":"2022 IEEE Information Theory Workshop (ITW)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Information Theory Workshop (ITW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083616351","display_name":"Sujay Bhatt","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sujay Bhatt","raw_affiliation_strings":["Baidu Research,Cognitive Computing Lab,USA,WA 98004"],"affiliations":[{"raw_affiliation_string":"Baidu Research,Cognitive Computing Lab,USA,WA 98004","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015142209","display_name":"Guanhua Fang","orcid":"https://orcid.org/0000-0002-4937-2601"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guanhua Fang","raw_affiliation_strings":["Baidu Research,Cognitive Computing Lab,USA,WA 98004"],"affiliations":[{"raw_affiliation_string":"Baidu Research,Cognitive Computing Lab,USA,WA 98004","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100435527","display_name":"Ping Li","orcid":"https://orcid.org/0000-0002-5979-8868"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ping Li","raw_affiliation_strings":["Baidu Research,Cognitive Computing Lab,USA,WA 98004"],"affiliations":[{"raw_affiliation_string":"Baidu Research,Cognitive Computing Lab,USA,WA 98004","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051592584","display_name":"Gennady Samorodnitsky","orcid":"https://orcid.org/0000-0001-9947-2574"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gennady Samorodnitsky","raw_affiliation_strings":["Cornell University,School of ORIE,Ithaca,USA,NY 14853"],"affiliations":[{"raw_affiliation_string":"Cornell University,School of ORIE,Ithaca,USA,NY 14853","institution_ids":["https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5083616351"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20567756,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"19","issue":null,"first_page":"137","last_page":"142"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.9597100019454956},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.8331402540206909},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7042465209960938},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.6230774521827698},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6130838394165039},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5490619540214539},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4965234398841858},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4600837230682373},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4369010031223297},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.388852596282959},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3286899924278259},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2068178951740265}],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.9597100019454956},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.8331402540206909},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7042465209960938},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.6230774521827698},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6130838394165039},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5490619540214539},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4965234398841858},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4600837230682373},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4369010031223297},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.388852596282959},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3286899924278259},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2068178951740265},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/itw54588.2022.9965820","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itw54588.2022.9965820","pdf_url":null,"source":{"id":"https://openalex.org/S4363606450","display_name":"2022 IEEE Information Theory Workshop (ITW)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Information Theory Workshop (ITW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.8100000023841858,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1531218190","https://openalex.org/W1662803991","https://openalex.org/W1771410628","https://openalex.org/W1988526405","https://openalex.org/W2000850397","https://openalex.org/W2009551863","https://openalex.org/W2071983464","https://openalex.org/W2074680702","https://openalex.org/W2097931172","https://openalex.org/W2119567691","https://openalex.org/W2736601468","https://openalex.org/W2750990725","https://openalex.org/W2753754162","https://openalex.org/W2766447205","https://openalex.org/W2920362155","https://openalex.org/W2950290000","https://openalex.org/W2964043796","https://openalex.org/W3017595567","https://openalex.org/W3020125231","https://openalex.org/W3035397247","https://openalex.org/W3037396296","https://openalex.org/W3040227767","https://openalex.org/W3043199428","https://openalex.org/W3130181154","https://openalex.org/W4205199304","https://openalex.org/W4206530644","https://openalex.org/W4255052368","https://openalex.org/W4287725108","https://openalex.org/W4294183581","https://openalex.org/W4298857966","https://openalex.org/W4299370734","https://openalex.org/W6637077600","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6639039794","https://openalex.org/W6675999342","https://openalex.org/W6692846177","https://openalex.org/W6736209634","https://openalex.org/W6741002519","https://openalex.org/W6743617358","https://openalex.org/W6743825324","https://openalex.org/W6760405395","https://openalex.org/W6769500229","https://openalex.org/W6776719514","https://openalex.org/W6779722832","https://openalex.org/W6780275439","https://openalex.org/W6780493559","https://openalex.org/W6780512680","https://openalex.org/W6791291558"],"related_works":["https://openalex.org/W2970347269","https://openalex.org/W4287863949","https://openalex.org/W1850488217","https://openalex.org/W4287102143","https://openalex.org/W2945119207","https://openalex.org/W3182614517","https://openalex.org/W2379651310","https://openalex.org/W2113019827","https://openalex.org/W1541249122","https://openalex.org/W2413828414"],"abstract_inverted_index":{"Learning":[0],"in":[1,29],"a":[2,9,23,44,51,64,77],"Markov":[3,45],"Decision":[4],"Process":[5],"(MDP)":[6],"framework":[7],"is":[8,97],"fundamental":[10],"challenge":[11],"for":[12,72,105],"sequential":[13],"decision":[14],"making":[15],"under":[16],"uncertainty.":[17],"In":[18,36],"this":[19,59,82],"paper,":[20],"we":[21,38,62],"present":[22],"new":[24],"perspective":[25],"on":[26,81],"model-based":[27],"learning":[28],"MDPs":[30],"using":[31],"ideas":[32],"from":[33],"renewal":[34,52],"theory.":[35],"particular,":[37],"reformulate":[39],"the":[40,86,94],"problem":[41],"of":[42,49,93],"controlling":[43,50],"chain":[46],"to":[47,99],"one":[48],"reward":[53],"process":[54],"with":[55,85],"bandit":[56],"feedback.":[57],"For":[58],"reformulated":[60],"problem,":[61],"provide":[63,76],"regret":[65,87],"decomposition":[66],"that":[67],"informs":[68],"novel":[69],"algorithm":[70,79,96],"design":[71],"MDPs.":[73,107],"We":[74],"further":[75],"naive":[78],"based\u00a8":[80],"reformulation":[83],"along":[84],"analysis.":[88],"A":[89],"simple":[90],"greedy":[91],"variant":[92],"proposed":[95],"shown":[98],"empirically":[100],"outperform":[101],"popular":[102],"value-based":[103],"methods":[104],"finite":[106]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
