{"id":"https://openalex.org/W2962764550","doi":"https://doi.org/10.1109/icassp.2011.5946273","title":"The non-Bayesian restless multi-armed bandit: A case of near-logarithmic regret","display_name":"The non-Bayesian restless multi-armed bandit: A case of near-logarithmic regret","publication_year":2011,"publication_date":"2011-05-01","ids":{"openalex":"https://openalex.org/W2962764550","doi":"https://doi.org/10.1109/icassp.2011.5946273","mag":"2962764550"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2011.5946273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2011.5946273","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102990156","display_name":"W. H. Dai","orcid":"https://orcid.org/0000-0003-1732-7985"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenhan Dai","raw_affiliation_strings":["School of Information Science and Technology, Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011040174","display_name":"Yi Gai","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yi Gai","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063784062","display_name":"Bhaskar Krishnamachari","orcid":"https://orcid.org/0000-0002-9994-9931"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bhaskar Krishnamachari","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013788255","display_name":"Qing Zhao","orcid":"https://orcid.org/0000-0002-9590-4285"},"institutions":[{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Zhao","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Davis, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Davis, USA","institution_ids":["https://openalex.org/I84218800"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102990156"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":13.9661,"has_fulltext":false,"cited_by_count":52,"citation_normalized_percentile":{"value":0.9909097,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"47","issue":null,"first_page":"2940","last_page":"2943"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.9131783246994019},{"id":"https://openalex.org/keywords/multi-armed-bandit","display_name":"Multi-armed bandit","score":0.6980575323104858},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.6715837121009827},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6135777831077576},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6107702255249023},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.5855849385261536},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5411469340324402},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5226435661315918},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.46668702363967896},{"id":"https://openalex.org/keywords/thompson-sampling","display_name":"Thompson sampling","score":0.42490154504776},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.34426045417785645},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3188678026199341},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29675737023353577},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2291557788848877},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.093852698802948}],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.9131783246994019},{"id":"https://openalex.org/C123197309","wikidata":"https://www.wikidata.org/wiki/Q2882343","display_name":"Multi-armed bandit","level":3,"score":0.6980575323104858},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.6715837121009827},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6135777831077576},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6107702255249023},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.5855849385261536},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5411469340324402},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5226435661315918},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.46668702363967896},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.42490154504776},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.34426045417785645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3188678026199341},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29675737023353577},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2291557788848877},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.093852698802948},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2011.5946273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2011.5946273","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2003798390","https://openalex.org/W2009551863","https://openalex.org/W2029199203","https://openalex.org/W2056921512","https://openalex.org/W2105556121","https://openalex.org/W2120125372","https://openalex.org/W2141515329","https://openalex.org/W2148250692","https://openalex.org/W2168405694","https://openalex.org/W2171671264","https://openalex.org/W2951678875"],"related_works":["https://openalex.org/W2947263763","https://openalex.org/W4288373430","https://openalex.org/W3036260055","https://openalex.org/W4287753704","https://openalex.org/W2970347269","https://openalex.org/W3158925485","https://openalex.org/W2996735312","https://openalex.org/W4287863949","https://openalex.org/W2964125852","https://openalex.org/W1850547517"],"abstract_inverted_index":{"In":[0,129],"the":[1,44,70,77,80,102,107,113,117,136,140,213,222,231,236],"classic":[2],"Bayesian":[3,104,241],"restless":[4],"multi-armed":[5,163],"bandit":[6,164],"(RMAB)":[7],"problem,":[8],"there":[9],"are":[10,83],"N":[11],"arms,":[12],"with":[13,25],"rewards":[14],"on":[15,112],"all":[16],"arms":[17,36],"evolving":[18],"at":[19,37],"each":[20,38,150],"time":[21,39],"as":[22,156],"Markov":[23,81],"chains":[24],"known":[26,59,114,227],"parameters.":[27],"A":[28],"player":[29],"seeks":[30],"to":[31,42,60,85,95,134,206,212],"activate":[32],"K":[33],"\u2265":[34],"1":[35],"in":[40,63,67,75,159,202,235],"order":[41],"maximize":[43],"expected":[45,203],"total":[46],"reward":[47,204,216],"obtained":[48],"over":[49,188],"multiple":[50],"plays.":[51],"RMAB":[52,142],"is":[53,58,99,120,172,230],"a":[54,88,123,145,160,168,180,207,226,239],"challenging":[55],"problem":[56,97,105,165],"that":[57,98,194,217],"be":[61,86,219],"PSPACE-hard":[62],"general.":[64],"We":[65,90,174,192],"consider":[66],"this":[68,96,153,176],"work":[69],"even":[71],"harder":[72],"non-Bayesian":[73,141,162],"RMAB,":[74],"which":[76,148,167,210],"parameters":[78],"of":[79,122,127],"chain":[82],"assumed":[84],"unknown":[87,189],"priori.":[89],"develop":[91],"an":[92,157],"original":[93],"approach":[94,177],"applicable":[100],"when":[101],"corresponding":[103],"has":[106],"structure":[108],"that,":[109],"de":[110],"pending":[111],"parameter":[115],"values,":[116],"optimal":[118,137,223],"solution":[119],"one":[121],"prescribed":[124],"finite":[125,154],"set":[126,155],"policies.":[128],"such":[130,233],"settings,":[131],"we":[132],"propose":[133],"learn":[135],"policy":[138,151,171,183,196,224],"for":[139,166,184,238],"by":[143,178,221],"employing":[144],"suitable":[146],"meta-policy":[147],"treats":[149],"from":[152],"arm":[158],"different":[161],"single-arm":[169],"selection":[170],"optimal.":[173],"demonstrate":[175],"developing":[179],"novel":[181],"sensing":[182],"opportunistic":[185],"spectrum":[186],"access":[187],"dynamic":[190],"channels.":[191],"prove":[193],"our":[195],"achieves":[197],"near-logarithmic":[198],"regret":[199],"(the":[200],"difference":[201],"compared":[205],"model-aware":[208],"genie),":[209],"leads":[211],"same":[214],"average":[215],"can":[218],"achieved":[220],"under":[225],"model.":[228],"This":[229],"first":[232],"result":[234],"literature":[237],"non":[240],"RMAB.":[242]},"counts_by_year":[{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":10}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
