{"id":"https://openalex.org/W2079550602","doi":"https://doi.org/10.1145/2185395.2185430","title":"The multi-armed bandit, with constraints","display_name":"The multi-armed bandit, with constraints","publication_year":2012,"publication_date":"2012-03-09","ids":{"openalex":"https://openalex.org/W2079550602","doi":"https://doi.org/10.1145/2185395.2185430","mag":"2079550602"},"language":"en","primary_location":{"id":"doi:10.1145/2185395.2185430","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2185395.2185430","pdf_url":null,"source":{"id":"https://openalex.org/S4210187660","display_name":"ACM SIGMETRICS Performance Evaluation Review","issn_l":"0163-5999","issn":["0163-5999","1557-9484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMETRICS Performance Evaluation Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043553685","display_name":"Eric V. Denardo","orcid":null},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric V. Denardo","raw_affiliation_strings":["Yale University, New Haven, CT","YALE UNIVERSITY, New Haven, CT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT","institution_ids":["https://openalex.org/I32971472"]},{"raw_affiliation_string":"YALE UNIVERSITY, New Haven, CT","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013021251","display_name":"Eugene A. Feinberg","orcid":"https://orcid.org/0000-0002-8263-0772"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eugene A. Feinberg","raw_affiliation_strings":["Stony Brook University, Stony Brook, NY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stony Brook University, Stony Brook, NY","institution_ids":["https://openalex.org/I59553526"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044612344","display_name":"Uriel G. Rothblum","orcid":null},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Uriel G. Rothblum","raw_affiliation_strings":["Technion - Israel Institute of Technology, Haifa, Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology, Haifa, Israel","institution_ids":["https://openalex.org/I174306211"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17072524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"39","issue":"4","first_page":"39","last_page":"39"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.816082239151001},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7502583265304565},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5728245377540588},{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.5358869433403015},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.518543541431427},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.505441427230835},{"id":"https://openalex.org/keywords/multi-armed-bandit","display_name":"Multi-armed bandit","score":0.48644042015075684},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.4576295018196106},{"id":"https://openalex.org/keywords/mathematical-proof","display_name":"Mathematical proof","score":0.45409223437309265},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4432584345340729},{"id":"https://openalex.org/keywords/additive-markov-chain","display_name":"Additive Markov chain","score":0.4264601469039917},{"id":"https://openalex.org/keywords/markov-property","display_name":"Markov property","score":0.40276622772216797},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.3597952723503113},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3421483337879181},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2063387632369995},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1299617886543274},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06540688872337341}],"concepts":[{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.816082239151001},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7502583265304565},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5728245377540588},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.5358869433403015},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.518543541431427},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.505441427230835},{"id":"https://openalex.org/C123197309","wikidata":"https://www.wikidata.org/wiki/Q2882343","display_name":"Multi-armed bandit","level":3,"score":0.48644042015075684},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.4576295018196106},{"id":"https://openalex.org/C108710211","wikidata":"https://www.wikidata.org/wiki/Q11538","display_name":"Mathematical proof","level":2,"score":0.45409223437309265},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4432584345340729},{"id":"https://openalex.org/C96810086","wikidata":"https://www.wikidata.org/wiki/Q17003273","display_name":"Additive Markov chain","level":5,"score":0.4264601469039917},{"id":"https://openalex.org/C189973286","wikidata":"https://www.wikidata.org/wiki/Q176695","display_name":"Markov property","level":4,"score":0.40276622772216797},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.3597952723503113},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3421483337879181},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2063387632369995},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1299617886543274},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06540688872337341},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2185395.2185430","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2185395.2185430","pdf_url":null,"source":{"id":"https://openalex.org/S4210187660","display_name":"ACM SIGMETRICS Performance Evaluation Review","issn_l":"0163-5999","issn":["0163-5999","1557-9484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGMETRICS Performance Evaluation Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W166250193","https://openalex.org/W1518931405","https://openalex.org/W1549884163","https://openalex.org/W1551911640","https://openalex.org/W1567724294","https://openalex.org/W1965208040","https://openalex.org/W1990256806","https://openalex.org/W1991591460","https://openalex.org/W1995442741","https://openalex.org/W1996859119","https://openalex.org/W2001015965","https://openalex.org/W2025702744","https://openalex.org/W2045374782","https://openalex.org/W2103783883","https://openalex.org/W2118309135","https://openalex.org/W2129239159","https://openalex.org/W2138657485","https://openalex.org/W2144002623","https://openalex.org/W2159471113","https://openalex.org/W2166619657","https://openalex.org/W2317700292","https://openalex.org/W2499002200","https://openalex.org/W2531735017"],"related_works":["https://openalex.org/W2096013579","https://openalex.org/W1589140671","https://openalex.org/W1760611253","https://openalex.org/W52153049","https://openalex.org/W2951545791","https://openalex.org/W1515117609","https://openalex.org/W2294884454","https://openalex.org/W4323315247","https://openalex.org/W3169161914","https://openalex.org/W4321379664"],"abstract_inverted_index":{"The":[0,39,51,219,367,399],"colorfully-named":[1],"and":[2,34,79,150,281,296,314,358,387,411,438],"much-studied":[3],"multi-armed":[4,124,157,468],"bandit":[5,125,158,181],"is":[6,55,159,174,201,255,264,275,327,342,361,427],"the":[7,22,81,90,93,100,104,117,123,156,188,209,216,226,268,310,318,325,334,346,381,384,424,431,435,442,449,463],"following":[8],"Markov":[9,29,40,52,95,101,118,210],"decision":[10,19,102,170],"problem:":[11],"At":[12],"epochs":[13],"1,":[14],"2,":[15],"...":[16],",":[17],"a":[18,112,135,166,202,229,239,271,303,321,364,419],"maker":[20],"observes":[21],"current":[23,49,77,213],"state":[24,78,82,113,178,214,248],"of":[25,27,37,92,99,116,122,138,169,179,225,228,234,241,249,270,312,320,348,441,452],"each":[26,115,177,180,206,247,250],"several":[28,222,374],"chains":[30,41,96],"with":[31,470],"rewards":[32,386,440],"(bandits)":[33],"plays":[35,208],"one":[36,59,245],"them.":[38],"that":[42,54,72,160,183,332],"are":[43,106,194,288,371,404,414,445,460],"not":[44],"played":[45,56],"remain":[46],"in":[47,195,455],"their":[48,407],"states.":[50],"chain":[53,211],"evolves":[57],"for":[58,114,155,246,277,302,391,467],"transition":[60,64,85,388,436],"according":[61],"to":[62,83,88,144,165,176,266,290,297,329,344,416,429,447],"its":[63,76,299],"probabilities,":[65],"earning":[66],"an":[67,292,339,349,378],"immediate":[68],"reward":[69],"(possibly":[70],"negative)":[71],"can":[73,162],"depend":[74],"upon":[75],"on":[80,238,373,409],"which":[84],"occurs.":[86],"Henceforth,":[87],"distinguish":[89],"states":[91,186],"individual":[94,392],"from":[97],"those":[98],"problem,":[103],"latter":[105],"called":[107,380,402],"multi-states.":[108],"Each":[109],"multi-state":[110],"prescribes":[111],"chains.":[119],"This":[120,274],"version":[121],"problem":[126],"was":[127],"originally":[128],"solved":[129],"by":[130,394,462],"John":[131],"Gittins.":[132],"It":[133],"has":[134,215],"large":[136],"range":[137],"operations":[139,287],"research":[140],"applications":[141,143],"including":[142],"resource":[145],"allocation,":[146],"scheduling,":[147],"project":[148],"management,":[149],"search.":[151],"A":[152,172,198,252],"key":[153],"result":[154],"attention":[161],"be":[163],"restricted":[164],"simple":[167],"class":[168],"procedures.":[171],"label":[173],"assigned":[175],"such":[182],"no":[184],"two":[185],"have":[187],"same":[189],"label,":[190],"even":[191],"if":[192],"they":[193],"different":[196,223,253],"bandits.":[197,335,433],"priority":[199,230,272,294,356],"rule":[200,295],"policy":[203,341],"that,":[204],"given":[205,304,420],"multistate,":[207],"whose":[212],"lowest":[217],"label.":[218],"literature":[220],"includes":[221],"proofs":[224,236],"optimality":[227,269],"rule.":[231,273],"Nearly":[232],"all":[233,406],"these":[235],"rest":[237],"family":[240],"optimal":[242,262,293,340],"stopping":[243],"times,":[244],"bandit.":[251],"approach":[254],"taken":[256],"here.":[257],"Pair-wise":[258],"comparison,":[259],"rather":[260],"than":[261],"stopping,":[263],"used":[265,289,446,461],"demonstrate":[267],"accomplished":[276],"models":[278],"having":[279],"linear":[280,313,322],"exponential":[282,315],"utility":[283,301,323],"functions.":[284],"Elementary":[285],"row":[286,397],"identify":[291],"compute":[298,448],"expected":[300],"starting":[305],"state.":[306],"Our":[307],"analysis":[308],"covers":[309],"cases":[311],"utilities.":[316],"In":[317],"case":[319],"function,":[324],"model":[326],"generalized":[328],"include":[330],"constraints":[331],"link":[333],"With":[336],"C":[337,353],"constraints,":[338],"shown":[343],"take":[345],"form":[347],"initial":[350],"randomization":[351],"over":[352],"+":[354],"1":[355],"rules,":[357],"column":[359,464],"generation":[360,465],"proposed":[362,368],"as":[363],"solution":[365],"method.":[366],"computational":[369],"methods":[370],"based":[372],"matrix":[375],"algorithms.":[376],"First,":[377],"algorithm,":[379],"Triangularizer,":[382],"transforms":[383],"one-step":[385,439],"probability":[389],"matrixes":[390],"bandits":[393,426,444,469],"applying":[395],"elementary":[396],"operations.":[398],"transformed":[400,425,443],"matrixes,":[401],"finalized,":[403],"triangle:":[405],"elements":[408],"diagonals":[410,413],"below":[412],"equal":[415],"zero.":[417],"For":[418],"index":[421,453],"policy,":[422],"running":[423,430],"equivalent":[428],"original":[432],"Second,":[434],"probabilities":[437],"performance":[450],"characteristics":[451],"policies":[454],"polynomial":[456],"times.":[457],"These":[458],"computations":[459],"algorithm":[466],"constraints.":[471]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
