{"id":"https://openalex.org/W4285198473","doi":"https://doi.org/10.1109/tg.2022.3177598","title":"Dynamically Interrupting Deadlocks in Game Learning Using Multisampling Multiarmed Bandits","display_name":"Dynamically Interrupting Deadlocks in Game Learning Using Multisampling Multiarmed Bandits","publication_year":2022,"publication_date":"2022-05-24","ids":{"openalex":"https://openalex.org/W4285198473","doi":"https://doi.org/10.1109/tg.2022.3177598"},"language":"en","primary_location":{"id":"doi:10.1109/tg.2022.3177598","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2022.3177598","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048441212","display_name":"Rendong Chen","orcid":"https://orcid.org/0000-0002-3901-9708"},"institutions":[{"id":"https://openalex.org/I202126657","display_name":"Qufu Normal University","ror":"https://ror.org/03ceheh96","country_code":"CN","type":"education","lineage":["https://openalex.org/I202126657"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rendong Chen","raw_affiliation_strings":["School of Mathematical Sciences, Qufu Normal University, Qufu, China"],"raw_orcid":"https://orcid.org/0000-0002-3901-9708","affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Qufu Normal University, Qufu, China","institution_ids":["https://openalex.org/I202126657"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112446534","display_name":"Fa Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210163926","display_name":"Zhejiang Medicine (China)","ror":"https://ror.org/05h86nk22","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210163926"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fa Wu","raw_affiliation_strings":["Zhejiang Demetics Medical Technology Co., Ltd., Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Demetics Medical Technology Co., Ltd., Hangzhou, China","institution_ids":["https://openalex.org/I4210163926"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048441212"],"corresponding_institution_ids":["https://openalex.org/I202126657"],"apc_list":null,"apc_paid":null,"fwci":0.1695,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52662893,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"15","issue":"3","first_page":"360","last_page":"367"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6785755753517151},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6074703335762024},{"id":"https://openalex.org/keywords/deadlock","display_name":"Deadlock","score":0.5716797113418579},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.49825501441955566},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.49440574645996094},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4812731146812439},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4261893630027771},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3374224901199341},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3233899176120758},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2717617452144623},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.24559175968170166},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18088027834892273}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6785755753517151},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6074703335762024},{"id":"https://openalex.org/C159023740","wikidata":"https://www.wikidata.org/wiki/Q623276","display_name":"Deadlock","level":2,"score":0.5716797113418579},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.49825501441955566},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.49440574645996094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4812731146812439},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4261893630027771},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3374224901199341},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3233899176120758},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2717617452144623},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.24559175968170166},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18088027834892273}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tg.2022.3177598","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2022.3177598","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G8575241752","display_name":null,"funder_award_id":"12001220","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W19239227","https://openalex.org/W1191599655","https://openalex.org/W1258105458","https://openalex.org/W1528133536","https://openalex.org/W1555005026","https://openalex.org/W1569296262","https://openalex.org/W1625390266","https://openalex.org/W1785811943","https://openalex.org/W1794753160","https://openalex.org/W1823461189","https://openalex.org/W1888434271","https://openalex.org/W1973885534","https://openalex.org/W1998498767","https://openalex.org/W2009551863","https://openalex.org/W2029250042","https://openalex.org/W2032854309","https://openalex.org/W2038263585","https://openalex.org/W2054497239","https://openalex.org/W2068637807","https://openalex.org/W2077902449","https://openalex.org/W2090108951","https://openalex.org/W2103715332","https://openalex.org/W2122422466","https://openalex.org/W2135664069","https://openalex.org/W2168405694","https://openalex.org/W2295349279","https://openalex.org/W2400128071","https://openalex.org/W2522489477","https://openalex.org/W2738669288","https://openalex.org/W2769948160","https://openalex.org/W2921249625","https://openalex.org/W2950303912","https://openalex.org/W2950929549","https://openalex.org/W2962927562","https://openalex.org/W3034377858","https://openalex.org/W4206731730","https://openalex.org/W4287185275","https://openalex.org/W4293396018","https://openalex.org/W4294413807","https://openalex.org/W4298053223","https://openalex.org/W6600746971","https://openalex.org/W6627932998","https://openalex.org/W6631626818","https://openalex.org/W6633161741","https://openalex.org/W6634103402","https://openalex.org/W6636578284","https://openalex.org/W6638159179","https://openalex.org/W6675657435","https://openalex.org/W6677973377","https://openalex.org/W6692366259","https://openalex.org/W6693239863","https://openalex.org/W6760452739"],"related_works":["https://openalex.org/W4376155396","https://openalex.org/W2971351794","https://openalex.org/W1947085858","https://openalex.org/W2101991911","https://openalex.org/W2174986909","https://openalex.org/W2527791220","https://openalex.org/W2155070487","https://openalex.org/W4311589891","https://openalex.org/W4292701710","https://openalex.org/W814774360"],"abstract_inverted_index":{"In":[0],"many":[1],"reinforcement":[2,62],"learning":[3,27,166],"(RL)":[4],"game":[5,63,168],"tasks,":[6],"an":[7,41,123],"episode":[8],"should":[9],"be":[10,83],"interrupted":[11],"after":[12],"a":[13,23,49,98,104,164],"certain":[14],"time,":[15],"as":[16],"the":[17,32,58,66,87,90,118,137,149,155,159],"agent":[18,161],"could":[19],"sometimes":[20],"fall":[21],"into":[22],"deadlock":[24,59],"state.":[25],"The":[26,93,144],"process":[28,106],"is":[29,37,97,115],"sensitive":[30],"to":[31,39,55,85,154],"interruption":[33,76],"length.":[34],"So,":[35],"it":[36],"hard":[38],"determine":[40],"optimal":[42,75],"value":[43],"of":[44,74,89,127,158],"interruption.":[45],"This":[46],"article":[47],"presents":[48],"novel":[50],"multiarmed":[51],"bandit":[52,120],"(MAB)":[53],"model":[54,96],"dynamically":[56],"interrupt":[57],"state":[60],"in":[61,107,167],"learning,":[64],"with":[65,103],"assumption":[67],"that":[68,81,148],"there":[69],"are":[70],"neither":[71],"prior":[72,79],"knowledge":[73,80],"setting,":[77,121],"nor":[78],"can":[82,152],"used":[84],"improve":[86],"performance":[88,157],"RL":[91,160],"agent.":[92],"proposed":[94,116],"MAB":[95,101],"nonoblivious":[99],"adversarial":[100],"problem":[102],"multisampling":[105],"each":[108],"round.":[109],"An":[110],"efficient":[111],"algorithm":[112,138],"named":[113],"Exp3.P.MS":[114],"for":[117],"new":[119],"achieving":[122],"expected":[124],"regret":[125],"bound":[126],"<inline-formula":[128],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[129,141],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[130],"notation=\"LaTeX\">$\\mathcal":[131],"{O}(\\sqrt{nK\\ln":[132],"{(K)}})$</tex-math></inline-formula>":[133],".":[134,143],"We":[135],"run":[136],"on":[139],"<italic":[140],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Sokoban</i>":[142],"experimental":[145],"results":[146],"show":[147],"dynamic":[150],"interruptions":[151],"adapt":[153],"weak-to-strong":[156],"and":[162],"spur":[163],"fast":[165],"training.":[169]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}