{"id":"https://openalex.org/W3089859340","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207446","title":"An Improved Minimax-Q Algorithm Based on Generalized Policy Iteration to Solve a Chaser-Invader Game","display_name":"An Improved Minimax-Q Algorithm Based on Generalized Policy Iteration to Solve a Chaser-Invader Game","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3089859340","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207446","mag":"3089859340"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn48605.2020.9207446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207446","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051746991","display_name":"Minsong Liu","orcid":"https://orcid.org/0009-0003-0822-1377"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Minsong Liu","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080193690","display_name":"Yuanheng Zhu","orcid":"https://orcid.org/0000-0001-5384-423X"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanheng Zhu","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051746991"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210100255","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.5302,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.73107609,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9514999985694885,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9430000185966492,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.775213360786438},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.7724114656448364},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.664841890335083},{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.6401658654212952},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6141194105148315},{"id":"https://openalex.org/keywords/zero-sum-game","display_name":"Zero-sum game","score":0.6108658909797668},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6006548404693604},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.5960710644721985},{"id":"https://openalex.org/keywords/game-theory","display_name":"Game theory","score":0.5772070288658142},{"id":"https://openalex.org/keywords/game-tree","display_name":"Game tree","score":0.4777340590953827},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4719376266002655},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.45325690507888794},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.43314722180366516},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.43085744976997375},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4257248342037201},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4117434024810791},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.364299476146698},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.36033034324645996},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32352933287620544},{"id":"https://openalex.org/keywords/sequential-game","display_name":"Sequential game","score":0.3200400769710541},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23061302304267883},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.12494966387748718}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.775213360786438},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.7724114656448364},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.664841890335083},{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.6401658654212952},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6141194105148315},{"id":"https://openalex.org/C136356330","wikidata":"https://www.wikidata.org/wiki/Q156612","display_name":"Zero-sum game","level":3,"score":0.6108658909797668},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6006548404693604},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.5960710644721985},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.5772070288658142},{"id":"https://openalex.org/C95815963","wikidata":"https://www.wikidata.org/wiki/Q1377033","display_name":"Game tree","level":4,"score":0.4777340590953827},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4719376266002655},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.45325690507888794},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.43314722180366516},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.43085744976997375},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4257248342037201},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4117434024810791},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.364299476146698},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.36033034324645996},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32352933287620544},{"id":"https://openalex.org/C73795354","wikidata":"https://www.wikidata.org/wiki/Q287618","display_name":"Sequential game","level":3,"score":0.3200400769710541},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23061302304267883},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.12494966387748718},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn48605.2020.9207446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207446","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1485630385","https://openalex.org/W1519783625","https://openalex.org/W1526009983","https://openalex.org/W1542941925","https://openalex.org/W1757796397","https://openalex.org/W2145339207","https://openalex.org/W2164637474","https://openalex.org/W2257979135","https://openalex.org/W2550833987","https://openalex.org/W2766447205","https://openalex.org/W2886498421","https://openalex.org/W2904455790","https://openalex.org/W2910246453","https://openalex.org/W2931903779","https://openalex.org/W2942517366","https://openalex.org/W2951437627","https://openalex.org/W2963890729","https://openalex.org/W3007689182","https://openalex.org/W3123212791","https://openalex.org/W4288374897","https://openalex.org/W4298857966","https://openalex.org/W6631168379","https://openalex.org/W6637967152","https://openalex.org/W6684470913","https://openalex.org/W6760625672","https://openalex.org/W6763938161"],"related_works":["https://openalex.org/W2101748387","https://openalex.org/W1511927616","https://openalex.org/W3110979110","https://openalex.org/W3160714878","https://openalex.org/W3125878904","https://openalex.org/W1574738838","https://openalex.org/W2996059179","https://openalex.org/W1514842472","https://openalex.org/W3089859340","https://openalex.org/W2126090518"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,81,90,123],"use":[4,82,106],"reinforcement":[5],"learning":[6],"and":[7,78,141],"zero-sum":[8,99],"games":[9],"to":[10,46,65,96,110],"solve":[11,66,97,111],"a":[12,18,47,92],"Chaser-Invader":[13],"game,":[14],"which":[15,39],"is":[16,40],"actually":[17],"Markov":[19,27],"game":[20,44],"(MG).":[21],"Different":[22],"from":[23],"the":[24,34,58,67,98,104,112,126],"single":[25],"agent":[26,105],"Decision":[28],"Process":[29],"(MDP),":[30],"MG":[31],"can":[32,72,129],"realize":[33],"interaction":[35],"of":[36,43,86],"multiple":[37],"agents,":[38],"an":[41,53],"extension":[42],"theory":[45],"MDP":[48],"environment.":[49],"This":[50,101],"paper":[51],"proposes":[52],"improved":[54,127],"algorithm":[55,71,128],"based":[56],"on":[57],"classical":[59],"Minimax-Q":[60,70],"algorithm.":[61],"First,":[62],"in":[63,138,148],"order":[64],"problem":[68],"where":[69],"only":[73],"be":[74],"applied":[75],"for":[76],"discrete":[77],"simple":[79,139],"environment,":[80],"Deep":[83],"Q-network":[84],"instead":[85],"traditional":[87],"Q-learning.":[88],"Second,":[89],"propose":[91],"generalized":[93],"policy":[94],"iteration":[95],"game.":[100],"method":[102,109],"makes":[103],"linear":[107],"programming":[108],"Nash":[113],"equilibrium":[114],"action":[115],"at":[116],"each":[117],"moment.":[118],"Finally,":[119],"through":[120],"comparative":[121],"experiments,":[122],"prove":[124],"that":[125],"perform":[130],"as":[131,133],"well":[132],"Monte":[134,144],"Carlo":[135,145],"Tree":[136,146],"Search":[137,147],"environments":[140],"better":[142],"than":[143],"complex":[149],"environments.":[150]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
