{"id":"https://openalex.org/W2073107347","doi":"https://doi.org/10.1561/2200000038","title":"From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning","display_name":"From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning","publication_year":2014,"publication_date":"2014-01-20","ids":{"openalex":"https://openalex.org/W2073107347","doi":"https://doi.org/10.1561/2200000038","mag":"2073107347"},"language":"en","primary_location":{"id":"doi:10.1561/2200000038","is_oa":false,"landing_page_url":"https://doi.org/10.1561/2200000038","pdf_url":null,"source":{"id":"https://openalex.org/S4210188176","display_name":"Foundations and Trends\u00ae in Machine Learning","issn_l":"1935-8237","issn":["1935-8237","1935-8245"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318575","host_organization_name":"Now Publishers","host_organization_lineage":["https://openalex.org/P4310318575"],"host_organization_lineage_names":["Now Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Foundations and Trends\u00ae in Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006533777","display_name":"R\u00e9mi Munos","orcid":null},"institutions":[{"id":"https://openalex.org/I4210138412","display_name":"Centre Inria de l'Universit\u00e9 de Lille","ror":"https://ror.org/04eej9726","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1326498283","https://openalex.org/I4210138412"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"R\u00e9mi Munos","raw_affiliation_strings":["INRIA Lille \u2013 Nord Europe","Sequential Learning"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INRIA Lille \u2013 Nord Europe","institution_ids":["https://openalex.org/I4210138412"]},{"raw_affiliation_string":"Sequential Learning","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5006533777"],"corresponding_institution_ids":["https://openalex.org/I4210138412"],"apc_list":null,"apc_paid":null,"fwci":32.7645,"has_fulltext":false,"cited_by_count":249,"citation_normalized_percentile":{"value":0.99846713,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"7","issue":"1","first_page":"1","last_page":"129"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.727156400680542},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5884002447128296},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5749117732048035},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4661617875099182},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.4654707610607147},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.41108548641204834},{"id":"https://openalex.org/keywords/stochastic-optimization","display_name":"Stochastic optimization","score":0.41049525141716003},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.3741397559642792},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3358686566352844},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.2284303605556488}],"concepts":[{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.727156400680542},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5884002447128296},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5749117732048035},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4661617875099182},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.4654707610607147},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.41108548641204834},{"id":"https://openalex.org/C194387892","wikidata":"https://www.wikidata.org/wiki/Q1747770","display_name":"Stochastic optimization","level":2,"score":0.41049525141716003},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.3741397559642792},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3358686566352844},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.2284303605556488},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1561/2200000038","is_oa":false,"landing_page_url":"https://doi.org/10.1561/2200000038","pdf_url":null,"source":{"id":"https://openalex.org/S4210188176","display_name":"Foundations and Trends\u00ae in Machine Learning","issn_l":"1935-8237","issn":["1935-8237","1935-8245"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318575","host_organization_name":"Now Publishers","host_organization_lineage":["https://openalex.org/P4310318575"],"host_organization_lineage_names":["Now Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Foundations and Trends\u00ae in Machine Learning","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":121,"referenced_works":["https://openalex.org/W32104792","https://openalex.org/W50486269","https://openalex.org/W57706852","https://openalex.org/W153346180","https://openalex.org/W368265031","https://openalex.org/W638603679","https://openalex.org/W1483874187","https://openalex.org/W1495658734","https://openalex.org/W1497039698","https://openalex.org/W1499399028","https://openalex.org/W1505731132","https://openalex.org/W1510812122","https://openalex.org/W1512919909","https://openalex.org/W1521084402","https://openalex.org/W1523008036","https://openalex.org/W1524244278","https://openalex.org/W1527244075","https://openalex.org/W1540821927","https://openalex.org/W1551466210","https://openalex.org/W1553290137","https://openalex.org/W1556834409","https://openalex.org/W1561941139","https://openalex.org/W1568143599","https://openalex.org/W1568674531","https://openalex.org/W1570963478","https://openalex.org/W1576452626","https://openalex.org/W1589374853","https://openalex.org/W1605768620","https://openalex.org/W1625390266","https://openalex.org/W1626155273","https://openalex.org/W1668488459","https://openalex.org/W1699297496","https://openalex.org/W1714211023","https://openalex.org/W1857391317","https://openalex.org/W1881419322","https://openalex.org/W1888434271","https://openalex.org/W1911551976","https://openalex.org/W1970837196","https://openalex.org/W1973712811","https://openalex.org/W1983962754","https://openalex.org/W1997840820","https://openalex.org/W1998498767","https://openalex.org/W1999678910","https://openalex.org/W2000080679","https://openalex.org/W2004001705","https://openalex.org/W2009551863","https://openalex.org/W2010189695","https://openalex.org/W2020135152","https://openalex.org/W2033016725","https://openalex.org/W2039522160","https://openalex.org/W2049934117","https://openalex.org/W2052664405","https://openalex.org/W2061753713","https://openalex.org/W2071814471","https://openalex.org/W2073384958","https://openalex.org/W2077148472","https://openalex.org/W2077902449","https://openalex.org/W2096678000","https://openalex.org/W2096840748","https://openalex.org/W2097487180","https://openalex.org/W2100857832","https://openalex.org/W2103581319","https://openalex.org/W2105066050","https://openalex.org/W2107549951","https://openalex.org/W2108114251","https://openalex.org/W2110632090","https://openalex.org/W2110962519","https://openalex.org/W2113741278","https://openalex.org/W2114091522","https://openalex.org/W2115519224","https://openalex.org/W2117667273","https://openalex.org/W2119567691","https://openalex.org/W2119738618","https://openalex.org/W2120090487","https://openalex.org/W2121863487","https://openalex.org/W2126316555","https://openalex.org/W2129480103","https://openalex.org/W2131958277","https://openalex.org/W2137224975","https://openalex.org/W2137677949","https://openalex.org/W2141645258","https://openalex.org/W2142971854","https://openalex.org/W2144913588","https://openalex.org/W2150490089","https://openalex.org/W2152475379","https://openalex.org/W2155625359","https://openalex.org/W2156361796","https://openalex.org/W2157477959","https://openalex.org/W2158319693","https://openalex.org/W2158642177","https://openalex.org/W2158858912","https://openalex.org/W2159103845","https://openalex.org/W2160163723","https://openalex.org/W2168359464","https://openalex.org/W2168405694","https://openalex.org/W2168839459","https://openalex.org/W2169401877","https://openalex.org/W2169511307","https://openalex.org/W2171084228","https://openalex.org/W2182000050","https://openalex.org/W2183455062","https://openalex.org/W2189563888","https://openalex.org/W2206616202","https://openalex.org/W2312609093","https://openalex.org/W2317700292","https://openalex.org/W2332257675","https://openalex.org/W2339749518","https://openalex.org/W2401264332","https://openalex.org/W2569481888","https://openalex.org/W2611243847","https://openalex.org/W2737668828","https://openalex.org/W2794995252","https://openalex.org/W2950929549","https://openalex.org/W2951665052","https://openalex.org/W2963750583","https://openalex.org/W3100329718","https://openalex.org/W3102923851","https://openalex.org/W3125634603","https://openalex.org/W3141595720","https://openalex.org/W3146520007","https://openalex.org/W3148421669"],"related_works":["https://openalex.org/W1978745259","https://openalex.org/W3127438322","https://openalex.org/W4287330080","https://openalex.org/W1990112656","https://openalex.org/W2363855676","https://openalex.org/W2152231846","https://openalex.org/W195464835","https://openalex.org/W193468460","https://openalex.org/W2203568081","https://openalex.org/W1991890515"],"abstract_inverted_index":{"This":[0,147,438],"work":[1],"covers":[2],"several":[3,408],"aspects":[4],"of":[5,11,37,70,73,80,122,128,166,177,185,217,222,229,232,239,242,251,257,291,294,303,311,321,324,335,352,428,466],"the":[6,9,28,34,38,68,74,78,81,129,153,160,164,174,179,186,193,202,214,218,223,230,233,240,249,281,292,295,304,308,312,322,336,353,391,401,415,426,463,499,502,511,517,553],"optimism":[7],"in":[8,45,114,144,159,227,319,367,396,457,540,556],"face":[10],"uncertainty":[12],"principle":[13],"applied":[14,420,435],"to":[15,51,65,67,102,201,248,381,390,421,436,480,489],"large":[16,116],"scale":[17],"optimization":[18,58,83,112,253,346,446,454],"problems":[19,84],"under":[20],"finite":[21],"numerical":[22],"budget.":[23],"The":[24,92,220,301,448],"initial":[25],"motivation":[26,440],"for":[27,141,262,441,475,547],"research":[29],"reported":[30],"here":[31,96],"originated":[32],"from":[33],"empirical":[35],"success":[36],"so-called":[39,148],"Monte-Carlo":[40,416],"Tree":[41,417],"Search":[42,418],"method":[43,224,419],"popularized":[44],"Computer":[46,422],"Go":[47,423],"and":[48,59,85,188,287,399,407,424,460,472,494,506,524],"further":[49],"extended":[50],"many":[52],"other":[53],"games":[54],"as":[55,57,110,236,267,372,374,432,532,534],"well":[56,373,533],"planning":[60,554],"problems.":[61],"Our":[62,245],"objective":[63],"is":[64,97,100,131,162,225,254,297,338,359,377,470,504,521,537],"contribute":[66],"development":[69],"theoretical":[71],"foundations":[72],"field":[75,250],"by":[76,156,181,510],"characterizing":[77],"complexity":[79],"underlying":[82],"designing":[86,442],"efficient":[87,345],"algorithms":[88,260,305,347,430,473],"with":[89,274,328,386,487,560],"performance":[90,221,302],"guarantees.":[91],"main":[93,246,449],"idea":[94],"presented":[95,493],"that":[98,529],"it":[99,376,520,536],"possible":[101],"decompose":[103],"a":[104,115,120,134,157,170,182,210,237,255,387,467,477,490,548],"complex":[105],"decision":[106,127,142,558],"making":[107,143],"problem":[108,113,395,555],"(such":[109,266],"an":[111,526],"search":[117,212,264],"space)":[118],"into":[119],"sequence":[121,130],"elementary":[123],"decisions,":[124],"where":[125,462],"each":[126],"solved":[132],"using":[133],"(stochastic)":[135],"multi-armed":[136,393],"bandit":[137,150,158,168,394],"(simple":[138],"mathematical":[139],"model":[140],"stochastic":[145,392],"environments).":[146],"hierarchical":[149,258,452],"approach":[151],"(where":[152],"reward":[154],"observed":[155,204],"hierarchy":[161],"itself":[163],"return":[165],"another":[167],"at":[169,197],"deeper":[171],"level)":[172],"possesses":[173],"nice":[175],"feature":[176],"starting":[178],"exploration":[180],"quasi-uniform":[183],"sampling":[184],"space":[187,354,469],"then":[189,340],"focusing":[190],"progressively":[191],"on":[192,279,307,451],"most":[194],"promising":[195],"area,":[196],"different":[198,275],"scales,":[199],"according":[200],"evaluations":[203,282],"so":[205],"far,":[206],"until":[207],"eventually":[208],"performing":[209],"local":[211,333],"around":[213,314],"global":[215,316],"optima":[216,317],"function.":[219],"assessed":[226],"terms":[228,320],"optimality":[231],"returned":[234],"solution":[235],"function":[238,243,252,296,313,337,478],"number":[241],"evaluations.":[244],"contribution":[247],"class":[256],"optimistic":[259,445,453,545],"designed":[261,474],"general":[263,464],"spaces":[265],"metric":[268],"spaces,":[269],"trees,":[270],"graphs,":[271],"Euclidean":[272],"spaces)":[273],"algorithmic":[276],"instantiations":[277],"depending":[278],"whether":[280,288],"are":[283,455,492],"noisy":[284],"or":[285,299],"noiseless":[286],"some":[289,329,368],"measure":[290],"\u201csmoothness\u201d":[293],"known":[298,339,505,523],"unknown.":[300],"depends":[306],"\u201clocal\u201d":[309],"behavior":[310],"its":[315,485],"expressed":[318],"quantity":[323],"near-optimal":[325],"states":[326],"measured":[327],"metric.":[330],"If":[331],"this":[332,357],"smoothness":[334],"one":[341,361],"can":[342,362,507],"design":[343],"very":[344],"(with":[348],"convergence":[349],"rate":[350],"independent":[351],"dimension).":[355],"When":[356],"information":[358],"unknown,":[360],"build":[363],"adaptive":[364,527],"techniques":[365],"which,":[366],"cases,":[369],"perform":[370],"almost":[371,531],"when":[375,501,519,535],"known.":[378,538],"In":[379,410],"order":[380],"be":[382,481,508],"self-contained,":[383],"we":[384,413,543],"start":[385],"brief":[388],"introduction":[389],"Chapter":[397,411,496,514,541],"1":[398],"describe":[400,544],"UCB":[402],"(Upper":[403],"Confidence":[404],"Bound)":[405],"strategy":[406],"extensions.":[409],"2":[412],"present":[414],"show":[425],"limitations":[427],"previous":[429],"such":[431],"UCT":[433],"(UCB":[434],"Trees).":[437],"provides":[439],"theoretically":[443],"well-founded":[444],"algorithms.":[447],"contributions":[450],"described":[456],"Chapters":[458],"3":[459,497],"4":[461,515],"setting":[465],"semi-metric":[468,491,503],"introduced":[471],"optimizing":[476],"assumed":[479],"locally":[482],"smooth":[483],"(around":[484],"maxima)":[486],"respect":[488],"analyzed.":[495],"considers":[498,516],"case":[500,518],"used":[509],"algorithm,":[512],"whereas":[513],"not":[522],"describes":[525],"technique":[528],"does":[530],"Finally":[539],"5":[542],"strategies":[546],"specific":[549],"structured":[550],"problem,":[551],"namely":[552],"Markov":[557],"processes":[559],"infinite":[561],"horizon":[562],"discounted":[563],"rewards.":[564]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":20},{"year":2021,"cited_by_count":34},{"year":2020,"cited_by_count":23},{"year":2019,"cited_by_count":32},{"year":2018,"cited_by_count":23},{"year":2017,"cited_by_count":23},{"year":2016,"cited_by_count":28},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":15},{"year":2013,"cited_by_count":3}],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
