{"id":"https://openalex.org/W2000850397","doi":"https://doi.org/10.1109/tac.2013.2292137","title":"Online Markov Decision Processes Under Bandit Feedback","display_name":"Online Markov Decision Processes Under Bandit Feedback","publication_year":2014,"publication_date":"2014-01-31","ids":{"openalex":"https://openalex.org/W2000850397","doi":"https://doi.org/10.1109/tac.2013.2292137","mag":"2000850397"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2013.2292137","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2013.2292137","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-01079422","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077167635","display_name":"Gergely Neu","orcid":"https://orcid.org/0000-0001-6287-3796"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210138412","display_name":"Centre de recherche Inria Lille - Nord Europe","ror":"https://ror.org/04eej9726","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Gergely Neu","raw_affiliation_strings":["SequeL team, INRIA Lille-Nord Europe, Villeneuve d'Ascq, France","SequeL Team, INRIA Lille - Nord Eur., Villeneuve d'Ascq, France"],"affiliations":[{"raw_affiliation_string":"SequeL team, INRIA Lille-Nord Europe, Villeneuve d'Ascq, France","institution_ids":["https://openalex.org/I4210138412"]},{"raw_affiliation_string":"SequeL Team, INRIA Lille - Nord Eur., Villeneuve d'Ascq, France","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041609088","display_name":"Andr\u00e1s Gy\u00f6rgy","orcid":"https://orcid.org/0000-0003-0586-4337"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Andras Gyorgy","raw_affiliation_strings":["Department of Computing Science, University of Alberta, Edmonton, AB, Canada","Department of Computer Science, University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]},{"raw_affiliation_string":"Department of Computer Science, University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069856068","display_name":"Csaba Szepesv\u00e1ri","orcid":"https://orcid.org/0000-0002-9286-2892"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Csaba Szepesvari","raw_affiliation_strings":["Department of Computing Science, University of Alberta, Edmonton, AB, Canada","Department of Computer Science, University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]},{"raw_affiliation_string":"Department of Computer Science, University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042532195","display_name":"Andr\u00e1s Antos","orcid":null},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Andras Antos","raw_affiliation_strings":["Department of Computer Science and Information Theory, Budapest University of Technology and Economics, Budapest, Hungary","Budapest University of Technology and Economics , Budapest , Hungary"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Theory, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]},{"raw_affiliation_string":"Budapest University of Technology and Economics , Budapest , Hungary","institution_ids":["https://openalex.org/I29770179"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5077167635"],"corresponding_institution_ids":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"],"apc_list":null,"apc_paid":null,"fwci":5.2756,"has_fulltext":false,"cited_by_count":102,"citation_normalized_percentile":{"value":0.95257541,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"59","issue":"3","first_page":"676","last_page":"691"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.8875672221183777},{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.8015220165252686},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6784470081329346},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5724399089813232},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5576154589653015},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5366529822349548},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.5165892839431763},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.45858168601989746},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.3994613289833069},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.3811473548412323},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.37242788076400757},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.36529284715652466},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.35916393995285034},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35307568311691284},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2796778380870819},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.14674335718154907},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1373489797115326},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11413407325744629}],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.8875672221183777},{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.8015220165252686},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6784470081329346},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5724399089813232},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5576154589653015},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5366529822349548},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.5165892839431763},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.45858168601989746},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.3994613289833069},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.3811473548412323},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.37242788076400757},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.36529284715652466},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.35916393995285034},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35307568311691284},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2796778380870819},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.14674335718154907},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1373489797115326},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11413407325744629},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tac.2013.2292137","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2013.2292137","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-01079422v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01079422","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Automatic Control, 2014, 59, pp.676 - 691. &#x27E8;10.1109/TAC.2013.2292137&#x27E9;","raw_type":"Journal articles"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01079422v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01079422","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Automatic Control, 2014, 59, pp.676 - 691. &#x27E8;10.1109/TAC.2013.2292137&#x27E9;","raw_type":"Journal articles"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8100000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W100039866","https://openalex.org/W1570963478","https://openalex.org/W1784402736","https://openalex.org/W1850488217","https://openalex.org/W2005235540","https://openalex.org/W2074680702","https://openalex.org/W2077902449","https://openalex.org/W2109339818","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2122187689","https://openalex.org/W2135829225","https://openalex.org/W2138637686","https://openalex.org/W2153068585","https://openalex.org/W2156211713","https://openalex.org/W2157016390","https://openalex.org/W2159035200","https://openalex.org/W2160354932","https://openalex.org/W2162926979","https://openalex.org/W2241126168","https://openalex.org/W2334782222","https://openalex.org/W2484957131","https://openalex.org/W2595697910","https://openalex.org/W2596585349","https://openalex.org/W2604272474","https://openalex.org/W2611866857","https://openalex.org/W2911283634","https://openalex.org/W3020125231","https://openalex.org/W3084669832","https://openalex.org/W4214717370","https://openalex.org/W6604079820","https://openalex.org/W6639039794","https://openalex.org/W6650205757","https://openalex.org/W6690381995"],"related_works":["https://openalex.org/W2998446700","https://openalex.org/W2970347269","https://openalex.org/W4287863949","https://openalex.org/W4287102143","https://openalex.org/W1850488217","https://openalex.org/W2945119207","https://openalex.org/W3182614517","https://openalex.org/W2379651310","https://openalex.org/W2113019827","https://openalex.org/W1541249122"],"abstract_inverted_index":{"We":[0],"consider":[1],"online":[2],"learning":[3,28],"in":[4,10,38,40,48],"finite":[5],"stochastic":[6],"Markovian":[7],"environments":[8],"where":[9],"each":[11,49],"time":[12,50,125],"step":[13,51],"a":[14,135],"new":[15],"reward":[16,45,60],"function":[17],"is":[18,30,81,97,140],"chosen":[19],"by":[20],"an":[21,98,101],"oblivious":[22],"adversary.":[23],"The":[24,79,88],"goal":[25],"of":[26,42,90,104,130],"the":[27,34,43,52,55,59,63,67,72,85,91,127,147,156],"agent":[29,53,68,80],"to":[31,83],"compete":[32],"with":[33,62,75,100],"best":[35],"stationary":[36,115],"policy":[37],"hindsight":[39],"terms":[41],"total":[44],"received.":[46],"Specifically,":[47],"observes":[54],"current":[56],"state":[57,89],"and":[58],"associated":[61,74],"last":[64],"transition,":[65],"however,":[66],"does":[69],"not":[70],"observe":[71],"rewards":[73],"other":[76],"state-action":[77],"pairs.":[78],"assumed":[82],"know":[84],"transition":[86],"probabilities.":[87],"art":[92],"result":[93],"for":[94,155],"this":[95,111,131],"setting":[96],"algorithm":[99,132],"expected":[102,128],"regret":[103,129,153],"O(T":[105,141],"<sup":[106,142],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[107,143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2/3</sup>":[108],"lnT).":[109],"In":[110],"paper,":[112],"assuming":[113],"that":[114,122],"policies":[116],"mix":[117],"uniformly":[118],"fast,":[119],"we":[120],"show":[121],"after":[123],"T":[124],"steps,":[126],"(more":[133],"precisely,":[134],"slightly":[136],"modified":[137],"version":[138],"thereof)":[139],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1/2</sup>":[144],"lnT),":[145],"giving":[146],"first":[148],"rigorously":[149],"proven,":[150],"essentially":[151],"tight":[152],"bound":[154],"problem.":[157]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":57},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
