{"id":"https://openalex.org/W4388667106","doi":"https://doi.org/10.1109/allerton58177.2023.10313427","title":"Best Policy Identification in Linear MDPs","display_name":"Best Policy Identification in Linear MDPs","publication_year":2023,"publication_date":"2023-09-26","ids":{"openalex":"https://openalex.org/W4388667106","doi":"https://doi.org/10.1109/allerton58177.2023.10313427"},"language":"en","primary_location":{"id":"doi:10.1109/allerton58177.2023.10313427","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/allerton58177.2023.10313427","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 59th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016233534","display_name":"J\u00e9r\u00f4me Taupin","orcid":null},"institutions":[{"id":"https://openalex.org/I29607241","display_name":"\u00c9cole Normale Sup\u00e9rieure - PSL","ror":"https://ror.org/05a0dhs15","country_code":"FR","type":"other","lineage":["https://openalex.org/I2746051580","https://openalex.org/I29607241"]},{"id":"https://openalex.org/I4210149507","display_name":"\u00c9cole des Neurosciences de Paris","ror":"https://ror.org/05re0sm29","country_code":"FR","type":"other","lineage":["https://openalex.org/I39804081","https://openalex.org/I4210149507"]},{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["FR","SE"],"is_corresponding":false,"raw_author_name":"J\u00e9r\u00f4me Taupin","raw_affiliation_strings":["KTH Royal Institute of Technology,Stockholm,Sweden","ENS, Paris, France","KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Stockholm,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"ENS, Paris, France","institution_ids":["https://openalex.org/I4210149507","https://openalex.org/I29607241"]},{"raw_affiliation_string":"KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082148015","display_name":"Yassir Jedra","orcid":"https://orcid.org/0000-0002-4403-1066"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Yassir Jedra","raw_affiliation_strings":["KTH Royal Institute of Technology,Stockholm,Sweden","KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Stockholm,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025136069","display_name":"Alexandre Prouti\u00e8re","orcid":"https://orcid.org/0000-0002-4679-4673"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Alexandre Proutiere","raw_affiliation_strings":["KTH Royal Institute of Technology,Stockholm,Sweden","KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Stockholm,Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.236,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60404975,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.7350798845291138},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.6642519235610962},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.6400579810142517},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.6278451681137085},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.5854187607765198},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.5290804505348206},{"id":"https://openalex.org/keywords/ergodic-theory","display_name":"Ergodic theory","score":0.5226364135742188},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.4821767210960388},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2649129033088684},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.12057596445083618},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.10106441378593445}],"concepts":[{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.7350798845291138},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.6642519235610962},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6400579810142517},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.6278451681137085},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.5854187607765198},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.5290804505348206},{"id":"https://openalex.org/C122044880","wikidata":"https://www.wikidata.org/wiki/Q5498822","display_name":"Ergodic theory","level":2,"score":0.5226364135742188},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.4821767210960388},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2649129033088684},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.12057596445083618},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.10106441378593445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/allerton58177.2023.10313427","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/allerton58177.2023.10313427","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 59th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W107583932","https://openalex.org/W1516418405","https://openalex.org/W1915973093","https://openalex.org/W2106663919","https://openalex.org/W2120678009","https://openalex.org/W2122701159","https://openalex.org/W2959895084","https://openalex.org/W2971165405","https://openalex.org/W3008801451","https://openalex.org/W3038036515","https://openalex.org/W3091279148","https://openalex.org/W3092750504","https://openalex.org/W3102674085","https://openalex.org/W3148530143","https://openalex.org/W3167099912","https://openalex.org/W4206530644","https://openalex.org/W4212774754","https://openalex.org/W4221160937","https://openalex.org/W4320487853","https://openalex.org/W4400639671","https://openalex.org/W6676362848","https://openalex.org/W6678494045","https://openalex.org/W6681960134","https://openalex.org/W6754906582","https://openalex.org/W6763257922","https://openalex.org/W6767301823","https://openalex.org/W6769434015","https://openalex.org/W6776530296","https://openalex.org/W6779806053","https://openalex.org/W6780209122","https://openalex.org/W6784470123","https://openalex.org/W6784748513","https://openalex.org/W6786120962","https://openalex.org/W6809755281","https://openalex.org/W6841426164","https://openalex.org/W6849764524"],"related_works":["https://openalex.org/W50969306","https://openalex.org/W2390585021","https://openalex.org/W412641959","https://openalex.org/W2341040961","https://openalex.org/W2779828239","https://openalex.org/W3154976382","https://openalex.org/W4292101436","https://openalex.org/W2044004505","https://openalex.org/W4287207389","https://openalex.org/W1542540349"],"abstract_inverted_index":{"We":[0,25,81],"consider":[1],"the":[2,15,32,51,56,69,86,122,132,135,143,159,182,192],"problem":[3],"of":[4,35,58,126,134,195,238],"best":[5],"policy":[6,42],"identification":[7],"in":[8,14,142],"discounted":[9],"Linear":[10],"Markov":[11],"Decision":[12],"Processes":[13],"fixed":[16],"confidence":[17],"setting,":[18],"under":[19,166],"both":[20],"generative":[21,87],"and":[22,75,79,129,150,154,178],"forward":[23,160],"models.":[24],"derive":[26,244],"an":[27,40,59,234,239],"instance-specific":[28,235],"lower":[29,48,156],"bound":[30,49,140],"on":[31,256],"expected":[33],"number":[34],"samples":[36],"required":[37],"to":[38,72,184],"identify":[39],"\u03b5-optimal":[41],"with":[43],"probability":[44],"1":[45],"\u2013\u03b4.":[46],"The":[47],"characterizes":[50],"optimal":[52,170,240],"sampling":[53,77],"rule":[54],"as":[55,68],"solution":[57],"intricate":[60],"non-convex":[61],"optimization":[62],"program,":[63],"but":[64],"can":[65],"be":[66,185],"used":[67],"starting":[70],"point":[71],"devise":[73,82],"simple":[74],"near-optimal":[76],"rules":[78],"algorithms.":[80,84],"such":[83],"In":[85,158],"model,":[88,161],"our":[89,196],"algorithm":[90,197],"exhibits":[91],"a":[92,164],"sample":[93,193],"complexity":[94,194],"upper":[95,139,204],"bounded":[96,205],"by":[97,206],"$\\mathcal{O}\\left(":[98,207],"{\\left(":[99,208],"{d{{(1":[100],"-":[101,104,213,216],"\\gamma":[102,214],")}^{":[103,215],"4}}/(\\varepsilon":[105],"+":[106,116,218],"}":[107],"\\right.}":[108],"\\right.\\left.":[109],"{\\left.":[110],"{\\Delta":[111],"{)^2}}":[112],"\\right)(\\log":[113],"(1/\\delta":[114],")":[115],"d)}":[117],"\\right)$":[118,228],"where":[119,229],"\u0394":[120],"denotes":[121],"minimum":[123],"reward":[124],"gap":[125],"sub-optimal":[127],"actions":[128],"d":[130],"is":[131,172,176,198,233],"dimension":[133],"feature":[136],"space.":[137],"This":[138],"holds":[141],"moderate-confidence":[144],"regime":[145],"(i.e.,":[146],"for":[147,252],"all":[148],"\u03b4),":[149],"matches":[151],"existing":[152],"minimax":[153],"gap-dependent":[155],"bounds.":[157],"we":[162,247],"determine":[163],"condition":[165,175],"which":[167],"learning":[168],"approximately":[169],"policies":[171],"possible;":[173],"this":[174,190,245],"weak":[177],"does":[179],"not":[180],"require":[181],"MDP":[183],"ergodic":[186],"nor":[187],"communicating.":[188],"Under":[189],"condition,":[191],"asymptotically":[199],"(as":[200],"\u03b4":[201],"approaches":[202],"0)":[203],"{{\\sigma":[209],"^":[210],"\\star":[211],"}{{(1":[212],"4}}/{{(\\varepsilon":[217],"\\Delta":[219],")}^2}}":[220],"\\right)\\left(":[221],"{\\log":[222],"\\left(":[223],"{\\frac{1}{\\delta":[224],"}}":[225],"\\right)}":[226,227],"\u03c3<sup":[230],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[231],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">\u22c6</sup>":[232],"constant,":[236],"value":[237],"experiment-design":[241],"problem.":[242],"To":[243],"bound,":[246],"establish":[248],"novel":[249],"concentration":[250],"results":[251],"random":[253],"matrices":[254],"built":[255],"Markovian":[257],"data.":[258]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
