{"id":"https://openalex.org/W2899328950","doi":"https://doi.org/10.1109/tnnls.2018.2871361","title":"Stable and Efficient Policy Evaluation","display_name":"Stable and Efficient Policy Evaluation","publication_year":2018,"publication_date":"2018-10-30","ids":{"openalex":"https://openalex.org/W2899328950","doi":"https://doi.org/10.1109/tnnls.2018.2871361","mag":"2899328950","pmid":"https://pubmed.ncbi.nlm.nih.gov/30387743"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2018.2871361","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2018.2871361","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025744129","display_name":"Daoming Lyu","orcid":"https://orcid.org/0000-0003-2625-9865"},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daoming Lyu","raw_affiliation_strings":["Department of Computer Science and Software Engineering, Auburn University, Auburn, AL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Auburn University, Auburn, AL, USA","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030361565","display_name":"Bo Liu","orcid":"https://orcid.org/0000-0003-2519-6196"},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Liu","raw_affiliation_strings":["Department of Computer Science and Software Engineering, Auburn University, Auburn, AL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Auburn University, Auburn, AL, USA","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]},{"id":"https://openalex.org/I4210145544","display_name":"Laboratoire Interdisciplinaire des Environnements Continentaux","ror":"https://ror.org/05je79696","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I4210107625","https://openalex.org/I4210145544","https://openalex.org/I4210148025","https://openalex.org/I4387156182","https://openalex.org/I90183372","https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Matthieu Geist","raw_affiliation_strings":["Laboratoire Interdisciplinaire des Environnements Continentaux, Centre national de la recherche scientifique, Universit\u00e9 de Lorraine, Metz, France"],"affiliations":[{"raw_affiliation_string":"Laboratoire Interdisciplinaire des Environnements Continentaux, Centre national de la recherche scientifique, Universit\u00e9 de Lorraine, Metz, France","institution_ids":["https://openalex.org/I4210145544","https://openalex.org/I90183372"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047724274","display_name":"Wen Dong","orcid":"https://orcid.org/0000-0001-8923-2227"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wen Dong","raw_affiliation_strings":["Department of Computer Science and Engineering, The State University of New York at Buffalo, Buffalo, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The State University of New York at Buffalo, Buffalo, NY, USA","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110660235","display_name":"Sa\u00e2d Biaz","orcid":null},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Saad Biaz","raw_affiliation_strings":["Department of Computer Science and Software Engineering, Auburn University, Auburn, AL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, Auburn University, Auburn, AL, USA","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341321","display_name":"Qi Wang","orcid":"https://orcid.org/0000-0002-7028-4956"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Wang","raw_affiliation_strings":["School of Computer Science and the Center for OPTical IMagery Analysis and Learning, Northwestern Polytechnical University, Xi\u2019an, China","School of Computer Science and the Center for OPTical IMagery Analysis and Learning, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and the Center for OPTical IMagery Analysis and Learning, Northwestern Polytechnical University, Xi\u2019an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"School of Computer Science and the Center for OPTical IMagery Analysis and Learning, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025744129"],"corresponding_institution_ids":["https://openalex.org/I82497590"],"apc_list":null,"apc_paid":null,"fwci":0.6058,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.71363781,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"30","issue":"6","first_page":"1831","last_page":"1840"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6931928992271423},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6873629689216614},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6675765514373779},{"id":"https://openalex.org/keywords/oblique-case","display_name":"Oblique case","score":0.5200580954551697},{"id":"https://openalex.org/keywords/policy-analysis","display_name":"Policy analysis","score":0.47535058856010437},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.4591997563838959},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.45204076170921326},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.4373228847980499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3718850612640381},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35649609565734863},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33950483798980713},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2001962959766388},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.1142737865447998},{"id":"https://openalex.org/keywords/public-administration","display_name":"Public administration","score":0.07415589690208435}],"concepts":[{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6931928992271423},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6873629689216614},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6675765514373779},{"id":"https://openalex.org/C160697094","wikidata":"https://www.wikidata.org/wiki/Q1233197","display_name":"Oblique case","level":2,"score":0.5200580954551697},{"id":"https://openalex.org/C123587114","wikidata":"https://www.wikidata.org/wiki/Q2101508","display_name":"Policy analysis","level":2,"score":0.47535058856010437},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.4591997563838959},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.45204076170921326},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.4373228847980499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3718850612640381},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35649609565734863},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33950483798980713},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2001962959766388},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.1142737865447998},{"id":"https://openalex.org/C3116431","wikidata":"https://www.wikidata.org/wiki/Q31728","display_name":"Public administration","level":1,"score":0.07415589690208435},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2018.2871361","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2018.2871361","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:30387743","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/30387743","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5299999713897705,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1506342804","https://openalex.org/W1514587017","https://openalex.org/W1547925194","https://openalex.org/W1646707810","https://openalex.org/W1941556277","https://openalex.org/W2048687352","https://openalex.org/W2075268401","https://openalex.org/W2101533993","https://openalex.org/W2106261932","https://openalex.org/W2121703796","https://openalex.org/W2132351269","https://openalex.org/W2139418546","https://openalex.org/W2154761920","https://openalex.org/W2292896113","https://openalex.org/W2395162158","https://openalex.org/W2727279496","https://openalex.org/W2772589676","https://openalex.org/W2800877957","https://openalex.org/W2951143668","https://openalex.org/W2953189830","https://openalex.org/W2963674921","https://openalex.org/W4214717370","https://openalex.org/W6630907848","https://openalex.org/W6632901617","https://openalex.org/W6640533243","https://openalex.org/W6675078014","https://openalex.org/W6676024505","https://openalex.org/W6676072908","https://openalex.org/W6677370284","https://openalex.org/W6677984395","https://openalex.org/W6682499586","https://openalex.org/W6711807133","https://openalex.org/W6715102896","https://openalex.org/W6751035886","https://openalex.org/W6765836026"],"related_works":["https://openalex.org/W2320608497","https://openalex.org/W2067516463","https://openalex.org/W2070726643","https://openalex.org/W2134999447","https://openalex.org/W2734376513","https://openalex.org/W4205144164","https://openalex.org/W2615919102","https://openalex.org/W2186686745","https://openalex.org/W2962772812","https://openalex.org/W2001916836"],"abstract_inverted_index":{"Policy":[0],"evaluation":[1],"algorithms":[2,71,84],"are":[3,21,72,76,86],"essential":[4],"to":[5,9,12,32,48],"reinforcement":[6],"learning":[7],"due":[8],"their":[10],"ability":[11],"predict":[13],"the":[14,53,62,65,95,107,110],"performance":[15],"of":[16,109],"a":[17],"policy.":[18],"However,":[19],"there":[20],"two":[22],"long-standing":[23],"issues":[24],"lying":[25],"in":[26,52],"this":[27],"prediction":[28],"problem":[29],"that":[30,85],"need":[31],"be":[33],"tackled:":[34],"off-policy":[35,59,73,88],"stability":[36],"and":[37,68,90],"on-policy":[38,54,78,91],"efficiency.":[39],"The":[40,99],"conventional":[41],"temporal":[42],"difference":[43],"(TD)":[44],"algorithm":[45],"is":[46,57],"known":[47],"perform":[49],"very":[50],"well":[51],"setting,":[55],"yet":[56],"not":[58,77],"stable.":[60],"On":[61],"other":[63],"hand,":[64],"gradient":[66],"TD":[67,70],"emphatic":[69],"stable,":[74],"but":[75],"efficient.":[79],"This":[80],"paper":[81],"introduces":[82],"novel":[83],"both":[87],"stable":[89],"efficient":[92],"by":[93],"using":[94],"oblique":[96],"projection":[97],"method.":[98],"empirical":[100],"experimental":[101],"results":[102],"on":[103],"various":[104],"domains":[105],"validate":[106],"effectiveness":[108],"proposed":[111],"approach.":[112]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
