{"id":"https://openalex.org/W2765549618","doi":"https://doi.org/10.1007/978-3-319-70087-8_84","title":"Off-Policy Reinforcement Learning for Partially Unknown Nonzero-Sum Games","display_name":"Off-Policy Reinforcement Learning for Partially Unknown Nonzero-Sum Games","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2765549618","doi":"https://doi.org/10.1007/978-3-319-70087-8_84","mag":"2765549618"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-319-70087-8_84","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-319-70087-8_84","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049454999","display_name":"Qichao Zhang","orcid":"https://orcid.org/0000-0001-9747-391X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qichao Zhang","raw_affiliation_strings":["The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","University of Chinese Academy of Sciences, Beijing, 100049, China"],"affiliations":[{"raw_affiliation_string":"The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, 100049, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China","The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","University of Chinese Academy of Sciences, Beijing, 100049, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"The State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, 100049, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101704266","display_name":"Sibo Zhang","orcid":"https://orcid.org/0000-0003-1037-3970"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sibo Zhang","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Champaign, IL, 61801, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Champaign, IL, 61801, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5049454999"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210165038"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":0.534,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67906624,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"822","last_page":"830"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12047","display_name":"Viral Infections and Vectors","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9610999822616577,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8989894390106201},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7766436338424683},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6810922026634216},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.50672847032547},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.48132097721099854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4077785313129425},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15998592972755432}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8989894390106201},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7766436338424683},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6810922026634216},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.50672847032547},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.48132097721099854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4077785313129425},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15998592972755432}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-3-319-70087-8_84","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-319-70087-8_84","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1793264465","https://openalex.org/W2012451615","https://openalex.org/W2014558518","https://openalex.org/W2019338681","https://openalex.org/W2055300044","https://openalex.org/W2081514674","https://openalex.org/W2183137222","https://openalex.org/W2312229020","https://openalex.org/W2330024298","https://openalex.org/W2339745223","https://openalex.org/W2344560087","https://openalex.org/W2475651303","https://openalex.org/W2480536132","https://openalex.org/W2586495351","https://openalex.org/W4234761190"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W3087814763","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W4376605461","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2952356279"],"abstract_inverted_index":null,"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
