{"id":"https://openalex.org/W2172183234","doi":"https://doi.org/10.1109/ijcnn.2008.4633942","title":"A novel heuristic Q-learning algorithm for solving stochastic games","display_name":"A novel heuristic Q-learning algorithm for solving stochastic games","publication_year":2008,"publication_date":"2008-06-01","ids":{"openalex":"https://openalex.org/W2172183234","doi":"https://doi.org/10.1109/ijcnn.2008.4633942","mag":"2172183234"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2008.4633942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2008.4633942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100410902","display_name":"Jianwei Li","orcid":"https://orcid.org/0000-0003-4505-6860"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianwei Li","raw_affiliation_strings":["Department of Computer Science and Engineering, Yunnan University, Kunming, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Yunnan University, Kunming, China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014515981","display_name":"Weiyi Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiyi Liu","raw_affiliation_strings":["Department of Computer Science and Engineering, Yunnan University, Kunming, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Yunnan University, Kunming, China","institution_ids":["https://openalex.org/I189210763"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100410902"],"corresponding_institution_ids":["https://openalex.org/I189210763"],"apc_list":null,"apc_paid":null,"fwci":0.5721,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.80019539,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1135","last_page":"1144"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.651314377784729},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6496267914772034},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.46415671706199646},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.45002633333206177},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4146578907966614},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.349650502204895},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33425915241241455},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.19449758529663086},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18934521079063416}],"concepts":[{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.651314377784729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6496267914772034},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.46415671706199646},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.45002633333206177},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4146578907966614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.349650502204895},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33425915241241455},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.19449758529663086},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18934521079063416}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn.2008.4633942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2008.4633942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1507087299","https://openalex.org/W1519783625","https://openalex.org/W1539216098","https://openalex.org/W1541730457","https://openalex.org/W1542941925","https://openalex.org/W1595634327","https://openalex.org/W1807884544","https://openalex.org/W1931792391","https://openalex.org/W1998754086","https://openalex.org/W2007645463","https://openalex.org/W2028145673","https://openalex.org/W2067050450","https://openalex.org/W2107726111","https://openalex.org/W2119567691","https://openalex.org/W2120846115","https://openalex.org/W2122410182","https://openalex.org/W2124846360","https://openalex.org/W2131600418","https://openalex.org/W2140649624","https://openalex.org/W2147750403","https://openalex.org/W2149398074","https://openalex.org/W2150339816","https://openalex.org/W2160135758","https://openalex.org/W2248307931","https://openalex.org/W2334782222","https://openalex.org/W2341171179","https://openalex.org/W2623293810","https://openalex.org/W2787259794","https://openalex.org/W2949773717","https://openalex.org/W3011120880","https://openalex.org/W3026901963","https://openalex.org/W4205834900","https://openalex.org/W4233696721","https://openalex.org/W4254630254","https://openalex.org/W4255047891","https://openalex.org/W6631168379","https://openalex.org/W6680337228","https://openalex.org/W6682178038","https://openalex.org/W6683733446","https://openalex.org/W6684620542"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2053286651","https://openalex.org/W2181743346","https://openalex.org/W2187401768","https://openalex.org/W2181413294","https://openalex.org/W2989452537","https://openalex.org/W2052122378","https://openalex.org/W2320718078","https://openalex.org/W4384263527"],"abstract_inverted_index":{"We":[0,59],"solve":[1,53],"Nash":[2,29],"equilibrium":[3,30],"of":[4,21,68],"stochastic":[5,34,54],"games":[6,55],"using":[7],"heuristic":[8,63,80,140,145],"Q-learning":[9,46,141],"method":[10,47,114,130],"based":[11,48],"on":[12,49,90],"\u201cheuristic":[13],"learning\u201d":[14],"+":[15],"\u201c":[16],"Q-learning\u201d":[17],"under":[18],"the":[19,40,66,78,86],"framework":[20,137],"noncooperative":[22],"general-sum":[23],"games.":[24],"Determining":[25],"whether":[26],"a":[27,33,135],"strategy":[28],"exists":[31],"in":[32,82],"game":[35,41,71],"is":[36,42],"NP-hard":[37],"even":[38],"if":[39],"finite.":[43],"Therefore":[44],"normal":[45],"iterative":[50,127],"learning":[51,74,121,146],"can\u2019t":[52],"with":[56],"larger":[57],"scale.":[58],"attempt":[60],"to":[61,84,142],"make":[62],"evaluations":[64],"for":[65,138],"rewards":[67],"each":[69],"stage":[70],"encountered":[72],"during":[73],"and":[75,98,104,119],"improve":[76],"continually":[77],"relevant":[79],"Q-values":[81],"order":[83],"approach":[85],"optimal":[87],"learning.":[88],"Based":[89],"such":[91],"thought,":[92],"we":[93],"proposed":[94],"Multi-agent":[95],"Heuristic":[96],"Q-Learning(MHQL)method":[97],"proved":[99],"that":[100,112],"its":[101],"correctness,":[102],"convergence":[103,125],"acceptable":[105],"solving":[106],"time":[107],"complexity.":[108],"The":[109],"experimentation":[110],"shows":[111],"our":[113],"can":[115,131],"drastically":[116],"decrease":[117],"inefficient":[118],"repetitive":[120],"thus":[122],"speed":[123],"up":[124],"than":[126],"Q-learning.":[128],"Our":[129],"be":[132],"regarded":[133],"as":[134],"basic":[136],"general":[139],"design":[143],"better":[144],"rules.":[147]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}