{"id":"https://openalex.org/W2997055691","doi":"https://doi.org/10.1109/anzcc47194.2019.8945748","title":"Towards Q-learning the Whittle Index for Restless Bandits","display_name":"Towards Q-learning the Whittle Index for Restless Bandits","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W2997055691","doi":"https://doi.org/10.1109/anzcc47194.2019.8945748","mag":"2997055691"},"language":"en","primary_location":{"id":"doi:10.1109/anzcc47194.2019.8945748","is_oa":false,"landing_page_url":"https://doi.org/10.1109/anzcc47194.2019.8945748","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Australian &amp; New Zealand Control Conference (ANZCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034492629","display_name":"Jing Fu","orcid":"https://orcid.org/0000-0003-4615-8391"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Jing Fu","raw_affiliation_strings":["School of Mathematics and Statistics, University of Melbourne, Parkville, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, University of Melbourne, Parkville, VIC, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070445330","display_name":"Yoni Nazarathy","orcid":"https://orcid.org/0000-0003-3071-8462"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yoni Nazarathy","raw_affiliation_strings":["School of Mathematics and Physics, The University of Queensland, Australia"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Physics, The University of Queensland, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091752605","display_name":"Sarat Moka","orcid":"https://orcid.org/0000-0003-2868-9420"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sarat Moka","raw_affiliation_strings":["School of Mathematics and Physics, The University of Queensland, Australia"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Physics, The University of Queensland, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060879494","display_name":"Peter Taylor","orcid":"https://orcid.org/0000-0001-7600-5383"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Peter G. Taylor","raw_affiliation_strings":["School of Mathematics and Statistics, University of Melbourne, Parkville, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, University of Melbourne, Parkville, VIC, Australia","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5034492629"],"corresponding_institution_ids":["https://openalex.org/I165779595"],"apc_list":null,"apc_paid":null,"fwci":1.4433,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.84268842,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"249","last_page":"254"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recursion","display_name":"Recursion (computer science)","score":0.8796443343162537},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.8332387208938599},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7122845649719238},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6445068120956421},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6191769242286682},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.5613885521888733},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.551408588886261},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.5425913333892822},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5306749939918518},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.48390325903892517},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.4391050934791565},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43350809812545776},{"id":"https://openalex.org/keywords/time-horizon","display_name":"Time horizon","score":0.4151507616043091},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27281466126441956},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2541826665401459},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.15892791748046875}],"concepts":[{"id":"https://openalex.org/C168773036","wikidata":"https://www.wikidata.org/wiki/Q264164","display_name":"Recursion (computer science)","level":2,"score":0.8796443343162537},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.8332387208938599},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7122845649719238},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6445068120956421},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6191769242286682},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.5613885521888733},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.551408588886261},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.5425913333892822},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5306749939918518},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.48390325903892517},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.4391050934791565},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43350809812545776},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.4151507616043091},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27281466126441956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2541826665401459},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.15892791748046875},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/anzcc47194.2019.8945748","is_oa":false,"landing_page_url":"https://doi.org/10.1109/anzcc47194.2019.8945748","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Australian &amp; New Zealand Control Conference (ANZCC)","raw_type":"proceedings-article"},{"id":"pmh:oai:espace.library.uq.edu.au:UQ:ca3f4e3","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306402388","display_name":"Queensland's institutional digital repository (The University of Queensland)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165143802","host_organization_name":"The University of Queensland","host_organization_lineage":["https://openalex.org/I165143802"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.800000011920929,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W594357522","https://openalex.org/W1576452626","https://openalex.org/W1850488217","https://openalex.org/W1986389067","https://openalex.org/W1991513691","https://openalex.org/W2044502527","https://openalex.org/W2056921512","https://openalex.org/W2105556121","https://openalex.org/W2135063910","https://openalex.org/W2141515329","https://openalex.org/W2165418472","https://openalex.org/W2511028367","https://openalex.org/W2522848032","https://openalex.org/W2791825310","https://openalex.org/W2795378140","https://openalex.org/W3020125231","https://openalex.org/W3104937890","https://openalex.org/W3123482145","https://openalex.org/W3124084009","https://openalex.org/W3125634603","https://openalex.org/W6639039794","https://openalex.org/W6749787346"],"related_works":["https://openalex.org/W1976679120","https://openalex.org/W2377519369","https://openalex.org/W3202311233","https://openalex.org/W4281614748","https://openalex.org/W32024214","https://openalex.org/W2997055691","https://openalex.org/W187740018","https://openalex.org/W2162286586","https://openalex.org/W3087810330","https://openalex.org/W4255368532"],"abstract_inverted_index":{"We":[0,140,157],"consider":[1],"the":[2,18,46,53,60,69,73,84,100,106,110,154,165,171,189,192],"multi-armed":[3],"restless":[4],"bandit":[5],"problem":[6],"(RMABP)":[7],"with":[8,22,145],"an":[9,65,168],"infinite":[10],"horizon":[11],"average":[12],"cost":[13],"objective.":[14],"Each":[15],"arm":[16],"of":[17,45,51,72,99,105,109,153,170,191],"RMABP":[19,74],"is":[20,75],"associated":[21,54],"a":[23,38,43,128,138],"Markov":[24],"process":[25],"that":[26],"operates":[27],"in":[28],"two":[29],"modes:":[30],"active":[31],"and":[32,92,130,187],"passive.":[33],"At":[34],"each":[35,146],"time":[36,175],"slot":[37],"controller":[39],"needs":[40],"to":[41,48,77,149],"designate":[42],"subset":[44],"arms":[47],"be":[49,78,94],"active,":[50],"which":[52,113],"processes":[55],"will":[56],"evolve":[57],"differently":[58],"from":[59,117],"passive":[61],"case.":[62],"Treated":[63],"as":[64,162,174],"optimal":[66,70,90],"control":[67,164,182],"problem,":[68],"solution":[71],"known":[76],"computationally":[79],"intractable.":[80],"In":[81,120],"many":[82],"cases,":[83],"Whittle":[85,101,155,172,194],"index":[86,173,195],"policy":[87],"achieves":[88],"near":[89],"performance":[91,190],"can":[93],"tractably":[95],"found.":[96],"Nevertheless,":[97],"computation":[98],"indices":[102],"requires":[103],"knowledge":[104],"transition":[107],"matrices":[108],"underlying":[111],"processes,":[112],"are":[114],"sometimes":[115],"hidden":[116],"decision":[118],"makers.":[119],"this":[121],"paper,":[122],"we":[123,163],"take":[124],"first":[125],"steps":[126],"towards":[127],"tractable":[129],"efficient":[131],"reinforcement":[132],"learning":[133,167],"algorithm":[134],"for":[135],"controlling":[136],"such":[137],"system.":[139],"setup":[141],"parallel":[142],"Q-learning":[143],"recursions,":[144],"recursion":[147],"mapping":[148],"individual":[150],"possible":[151],"values":[152],"index.":[156],"then":[158],"update":[159],"these":[160],"recursions":[161],"system,":[166],"approximation":[169],"evolves.":[176],"Tested":[177],"on":[178],"several":[179],"examples,":[180],"our":[181],"outperforms":[183],"naive":[184],"priority":[185],"allocations":[186],"nears":[188],"fully-informed":[193],"policy.":[196]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
