{"id":"https://openalex.org/W2963007414","doi":"https://doi.org/10.1109/glocom.2011.6134244","title":"On the Combinatorial Multi-Armed Bandit Problem with Markovian Rewards","display_name":"On the Combinatorial Multi-Armed Bandit Problem with Markovian Rewards","publication_year":2011,"publication_date":"2011-12-01","ids":{"openalex":"https://openalex.org/W2963007414","doi":"https://doi.org/10.1109/glocom.2011.6134244","mag":"2963007414"},"language":"en","primary_location":{"id":"doi:10.1109/glocom.2011.6134244","is_oa":false,"landing_page_url":"https://doi.org/10.1109/glocom.2011.6134244","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Global Telecommunications Conference - GLOBECOM 2011","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011040174","display_name":"Yi Gai","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yi Gai","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, CA, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063784062","display_name":"Bhaskar Krishnamachari","orcid":"https://orcid.org/0000-0002-9994-9931"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"B. Krishnamachari","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, CA, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101967011","display_name":"Mingyan Liu","orcid":"https://orcid.org/0000-0003-3295-9200"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingyan Liu","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Michigan, MI, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Michigan, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5011040174"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":4.6554,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.94872399,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bipartite-graph","display_name":"Bipartite graph","score":0.7597161531448364},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.747433066368103},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5785925984382629},{"id":"https://openalex.org/keywords/aperiodic-graph","display_name":"Aperiodic graph","score":0.5632542967796326},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.5340297222137451},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5254104137420654},{"id":"https://openalex.org/keywords/time-complexity","display_name":"Time complexity","score":0.5203304290771484},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4996044635772705},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.480588436126709},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.4789212942123413},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.47509992122650146},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4562181234359741},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.4491191506385803},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.413968950510025},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3859219253063202},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.3414081335067749},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2952955961227417},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22165820002555847},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.1932322382926941}],"concepts":[{"id":"https://openalex.org/C197657726","wikidata":"https://www.wikidata.org/wiki/Q174733","display_name":"Bipartite graph","level":3,"score":0.7597161531448364},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.747433066368103},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5785925984382629},{"id":"https://openalex.org/C104247578","wikidata":"https://www.wikidata.org/wiki/Q4779368","display_name":"Aperiodic graph","level":2,"score":0.5632542967796326},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.5340297222137451},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5254104137420654},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.5203304290771484},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4996044635772705},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.480588436126709},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.4789212942123413},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.47509992122650146},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4562181234359741},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4491191506385803},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.413968950510025},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3859219253063202},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3414081335067749},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2952955961227417},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22165820002555847},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.1932322382926941},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/glocom.2011.6134244","is_oa":false,"landing_page_url":"https://doi.org/10.1109/glocom.2011.6134244","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Global Telecommunications Conference - GLOBECOM 2011","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1666230842","https://openalex.org/W1996069568","https://openalex.org/W2000080679","https://openalex.org/W2009551863","https://openalex.org/W2020254732","https://openalex.org/W2061753713","https://openalex.org/W2113733815","https://openalex.org/W2120015434","https://openalex.org/W2120125372","https://openalex.org/W2125724988","https://openalex.org/W2139964991","https://openalex.org/W2148250692","https://openalex.org/W2158807713","https://openalex.org/W2168405694","https://openalex.org/W2222512263","https://openalex.org/W2962764550","https://openalex.org/W4232620022"],"related_works":["https://openalex.org/W4376155396","https://openalex.org/W2971351794","https://openalex.org/W1947085858","https://openalex.org/W2101991911","https://openalex.org/W4287863949","https://openalex.org/W4287102143","https://openalex.org/W1850488217","https://openalex.org/W2945119207","https://openalex.org/W3182614517","https://openalex.org/W2151412766"],"abstract_inverted_index":{"We":[0,147,158],"consider":[1],"a":[2,18,67,144,149,165],"combinatorial":[3],"generalization":[4],"of":[5,22,92,101,181],"the":[6,55,72,80,89,98,102,118,121,130,135,179,208],"classical":[7],"multi-armed":[8],"bandit":[9],"problem":[10],"that":[11,38,69,97,125,139,160,167],"is":[12,17,34,59,78,86,108,168,187],"defined":[13,115],"as":[14,40,117],"follows.":[15],"There":[16],"given":[19,145],"bipartite":[20],"graph":[21],"M":[23],"users":[24,93,107,182],"and":[25,134,151,176,183,192,202],"N\u2265M":[26],"resources.":[27,184],"For":[28],"each":[29,53,75],"user-resource":[30],"pair":[31],"(i,j),":[32],"there":[33],"an":[35,41],"associated":[36],"state":[37,74],"evolves":[39],"aperiodic":[42],"irreducible":[43],"finite-state":[44],"Markov":[45],"chain":[46],"with":[47,50],"unknown":[48],"parameters,":[49],"transitions":[51],"occurring":[52],"time":[54,76,175],"particular":[56],"user":[57,64],"i":[58,65],"allocated":[60,79],"resource":[61,81],"j.":[62,82],"The":[63,83],"receives":[66],"reward":[68,124,138],"depends":[70],"on":[71],"corresponding":[73],"it":[77],"system":[84],"objective":[85],"to":[87,94,112,172,190],"learn":[88],"best":[90],"matching":[91,133],"resources":[95],"so":[96],"long-term":[99],"sum":[100],"rewards":[103],"received":[104],"by":[105,129,143],"all":[106],"maximized.":[109],"This":[110,185],"corresponds":[111],"minimizing":[113],"regret,":[114],"here":[116],"gap":[119],"between":[120],"expected":[122,136],"total":[123,137],"can":[126,140,163],"be":[127,141],"obtained":[128],"best-possible":[131],"static":[132],"achieved":[142],"algorithm.":[146],"present":[148],"polynomial-storage":[150],"polynomial-complexity-per-step":[152],"matching-learning":[153],"algorithm":[154,162],"for":[155],"this":[156,161],"problem.":[157],"show":[159],"achieve":[164],"regret":[166],"uniformly":[169],"arbitrarily":[170],"close":[171],"logarithmic":[173],"in":[174,178,195,207],"polynomial":[177],"number":[180],"formulation":[186],"broadly":[188],"applicable":[189],"scheduling":[191],"switching":[193],"problems":[194],"communication":[196],"networks":[197,201],"including":[198],"cognitive":[199],"radio":[200],"significantly":[203],"extends":[204],"prior":[205],"results":[206],"area.":[209]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
