{"id":"https://openalex.org/W4390706036","doi":"https://doi.org/10.1109/tnnls.2023.3346992","title":"Approximate Policy Iteration With Deep Minimax Average Bellman Error Minimization","display_name":"Approximate Policy Iteration With Deep Minimax Average Bellman Error Minimization","publication_year":2024,"publication_date":"2024-01-09","ids":{"openalex":"https://openalex.org/W4390706036","doi":"https://doi.org/10.1109/tnnls.2023.3346992","pmid":"https://pubmed.ncbi.nlm.nih.gov/38194389"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3346992","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3346992","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086788173","display_name":"Lican Kang","orcid":"https://orcid.org/0000-0003-3136-9225"},"institutions":[{"id":"https://openalex.org/I4210126319","display_name":"Duke-NUS Medical School","ror":"https://ror.org/02j1m6098","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596","https://openalex.org/I170897317","https://openalex.org/I4210126319"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Lican Kang","raw_affiliation_strings":["Cardiovascular and Metabolic Disorders Program, Duke-NUS Medical School, 8 College Road, Singapore"],"affiliations":[{"raw_affiliation_string":"Cardiovascular and Metabolic Disorders Program, Duke-NUS Medical School, 8 College Road, Singapore","institution_ids":["https://openalex.org/I4210126319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102730501","display_name":"Yuhui Liu","orcid":"https://orcid.org/0009-0001-5899-8745"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhui Liu","raw_affiliation_strings":["School of Mathematics and Statistics, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100452550","display_name":"Yuan Luo","orcid":"https://orcid.org/0000-0003-0195-7456"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Luo","raw_affiliation_strings":["School of Mathematics and Statistics, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057896052","display_name":"Jerry Zhijian Yang","orcid":"https://orcid.org/0000-0002-0402-4056"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jerry Zhijian Yang","raw_affiliation_strings":["School of Mathematics and Statistics and the Hubei Key Laboratory of Computational Science, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Mathematics and Statistics and the Hubei Key Laboratory of Computational Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100462708","display_name":"Han Yuan","orcid":"https://orcid.org/0000-0002-2674-6068"},"institutions":[{"id":"https://openalex.org/I4210126319","display_name":"Duke-NUS Medical School","ror":"https://ror.org/02j1m6098","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596","https://openalex.org/I170897317","https://openalex.org/I4210126319"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Han Yuan","raw_affiliation_strings":["Centre for Quantitative Medicine, Duke-NUS Medical School, 8 College Road, Singapore"],"affiliations":[{"raw_affiliation_string":"Centre for Quantitative Medicine, Duke-NUS Medical School, 8 College Road, Singapore","institution_ids":["https://openalex.org/I4210126319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040330350","display_name":"Chang Jun Zhu","orcid":"https://orcid.org/0009-0007-2771-5507"},"institutions":[{"id":"https://openalex.org/I4210160344","display_name":"Tongji Hospital","ror":"https://ror.org/04xy45965","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210160344"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Zhu","raw_affiliation_strings":["Department of Anesthesiology, Tongji Hospital, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Department of Anesthesiology, Tongji Hospital, Tongji Medical College, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641","https://openalex.org/I4210160344"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5086788173"],"corresponding_institution_ids":["https://openalex.org/I4210126319"],"apc_list":null,"apc_paid":null,"fwci":0.7301,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.72494996,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"36","issue":"2","first_page":"2288","last_page":"2299"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.9150345325469971},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.7535070180892944},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5092107057571411},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5075842142105103},{"id":"https://openalex.org/keywords/approximation-error","display_name":"Approximation error","score":0.4302067458629608},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3506808578968048},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.3222655653953552}],"concepts":[{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.9150345325469971},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.7535070180892944},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5092107057571411},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5075842142105103},{"id":"https://openalex.org/C122383733","wikidata":"https://www.wikidata.org/wiki/Q865920","display_name":"Approximation error","level":2,"score":0.4302067458629608},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3506808578968048},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3222655653953552}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3346992","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3346992","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38194389","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38194389","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8100000023841858}],"awards":[{"id":"https://openalex.org/G6921455745","display_name":null,"funder_award_id":"12071362","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8797177794","display_name":null,"funder_award_id":"12125103","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W1564755532","https://openalex.org/W1889629917","https://openalex.org/W2021440127","https://openalex.org/W2046513829","https://openalex.org/W2075672181","https://openalex.org/W2078750760","https://openalex.org/W2104753538","https://openalex.org/W2107726111","https://openalex.org/W2112264645","https://openalex.org/W2118556122","https://openalex.org/W2124477018","https://openalex.org/W2144446635","https://openalex.org/W2145339207","https://openalex.org/W2147303334","https://openalex.org/W2155007355","https://openalex.org/W2176263492","https://openalex.org/W2194775991","https://openalex.org/W2397607997","https://openalex.org/W2487501366","https://openalex.org/W2528305538","https://openalex.org/W2586680856","https://openalex.org/W2749772809","https://openalex.org/W2754517384","https://openalex.org/W2768956845","https://openalex.org/W2791797404","https://openalex.org/W2794109133","https://openalex.org/W2800415562","https://openalex.org/W2883364792","https://openalex.org/W2910568379","https://openalex.org/W2946302218","https://openalex.org/W2949883708","https://openalex.org/W2952298682","https://openalex.org/W2962736495","https://openalex.org/W2963245453","https://openalex.org/W2963626582","https://openalex.org/W2964179661","https://openalex.org/W2965192329","https://openalex.org/W2966349401","https://openalex.org/W2966477753","https://openalex.org/W2968831808","https://openalex.org/W2973682480","https://openalex.org/W2999571325","https://openalex.org/W3020075106","https://openalex.org/W3033882778","https://openalex.org/W3084746729","https://openalex.org/W3100366369","https://openalex.org/W3102511045","https://openalex.org/W3103559770","https://openalex.org/W3125407111","https://openalex.org/W3135642434","https://openalex.org/W3169375224","https://openalex.org/W3199799665","https://openalex.org/W3204236384","https://openalex.org/W4205779441","https://openalex.org/W4211133403","https://openalex.org/W4214717370","https://openalex.org/W4226257065","https://openalex.org/W4249716558","https://openalex.org/W4287212802","https://openalex.org/W4297964528","https://openalex.org/W4301084947","https://openalex.org/W6603123960","https://openalex.org/W6633472939","https://openalex.org/W6634413486","https://openalex.org/W6637597983","https://openalex.org/W6638018090","https://openalex.org/W6638698769","https://openalex.org/W6639430172","https://openalex.org/W6679100793","https://openalex.org/W6679257226","https://openalex.org/W6680729576","https://openalex.org/W6682849425","https://openalex.org/W6683356630","https://openalex.org/W6685322675","https://openalex.org/W6712524481","https://openalex.org/W6719647531","https://openalex.org/W6727147046","https://openalex.org/W6732665253","https://openalex.org/W6748651261","https://openalex.org/W6751002639","https://openalex.org/W6752578362","https://openalex.org/W6757677476","https://openalex.org/W6760905529","https://openalex.org/W6761973403","https://openalex.org/W6763000687","https://openalex.org/W6774679390","https://openalex.org/W6781960742","https://openalex.org/W6782866703","https://openalex.org/W6801019524","https://openalex.org/W6841060495","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W2016058626","https://openalex.org/W2474724840","https://openalex.org/W2963760573","https://openalex.org/W185788778","https://openalex.org/W2895916002","https://openalex.org/W1814049089","https://openalex.org/W1977348009","https://openalex.org/W2369683208","https://openalex.org/W3182145356","https://openalex.org/W2370315493"],"abstract_inverted_index":{"In":[0],"this":[1,157],"work,":[2],"we":[3,72,97,128],"investigate":[4],"the":[5,15,20,32,41,54,58,65,79,84,93,100,107,117,138,153,166,169,192,195,202,207,221,225,228,233],"utilization":[6],"of":[7,22,38,57,119,140,206,227,236],"deep":[8,123],"approximate":[9],"policy":[10,87],"iteration":[11],"(DAPI)":[12],"in":[13,88,137,165],"estimating":[14],"optimal":[16],"action-value":[17],"function":[18,81],"within":[19,152],"context":[21],"reinforcement":[23],"learning,":[24],"employing":[25],"rectified":[26],"linear":[27],"unit":[28],"(ReLU)":[29],"ResNet":[30,51,136,151],"as":[31,143,145,191,214],"underlying":[33],"framework.":[34],"The":[35,181],"iterative":[36],"process":[37,121,235],"DAPI":[39],"incorporates":[40],"minimax":[42],"average":[43],"Bellman":[44,59,94],"error":[45,70,75,184],"minimization":[46],"principle.":[47],"It":[48],"employs":[49],"ReLU":[50,135,150],"to":[52,161,177],"estimate":[53],"fixed":[55],"point":[56],"equation,":[60],"which":[61],"is":[62],"aligned":[63],"with":[64,106],"estimated":[66,80],"greedy":[67,86],"policy.":[68],"Through":[69],"propagation,":[71],"derive":[73],"nonasymptotic":[74,183],"bounds":[76,185,212],"between":[77],"and":[78,102,122,201,204],"induced":[82],"by":[83],"output":[85],"DAPI.":[89,237],"To":[90],"effectively":[91],"control":[92],"residual":[95],"error,":[96],"address":[98],"both":[99],"statistical":[101],"approximation":[103,124,147,158],"errors":[104],"associated":[105],"-mixing":[108],"dependent":[109,141],"data":[110],"derived":[111,182],"from":[112,173],"Markov":[113],"decision":[114],"processes,":[115],"using":[116],"techniques":[118],"empirical":[120],"theory,":[125],"respectively.":[126],"Furthermore,":[127],"present":[129],"a":[130,162,178],"novel":[131],"generalization":[132],"bound":[133,148,159],"for":[134,149,218],"presence":[139],"data,":[142],"well":[144],"an":[146,174],"H\u00f6lder":[154],"class.":[155],"Notably,":[156],"contributes":[160],"significant":[163],"improvement":[164],"dependence":[167],"on":[168,188],"ambient":[170,196],"dimension,":[171],"transitioning":[172],"exponential":[175],"relationship":[176],"polynomial":[179,199],"one.":[180],"explicitly":[186],"depend":[187],"factors":[189],"such":[190],"sample":[193],"size,":[194],"dimension":[197],"(in":[198],"terms),":[200],"width":[203],"depth":[205],"neural":[208],"networks.":[209],"Consequently,":[210],"these":[211],"serve":[213],"valuable":[215],"theoretical":[216],"guidelines":[217],"appropriately":[219],"setting":[220],"hyperparameters,":[222],"thereby":[223],"enabling":[224],"achievement":[226],"desired":[229],"convergence":[230],"rate":[231],"during":[232],"training":[234]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
