{"id":"https://openalex.org/W2787204030","doi":"https://doi.org/10.1109/ssci.2017.8280911","title":"Efficient actor-critic algorithm with dual piecewise model learning","display_name":"Efficient actor-critic algorithm with dual piecewise model learning","publication_year":2017,"publication_date":"2017-11-01","ids":{"openalex":"https://openalex.org/W2787204030","doi":"https://doi.org/10.1109/ssci.2017.8280911","mag":"2787204030"},"language":"en","primary_location":{"id":"doi:10.1109/ssci.2017.8280911","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssci.2017.8280911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Symposium Series on Computational Intelligence (SSCI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101902909","display_name":"Shan Zhong","orcid":"https://orcid.org/0000-0003-0034-6952"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Changshu Institute of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]},{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]},{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shan Zhong","raw_affiliation_strings":["Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou, Jiangsu","Ministry of Education, Jilin University, Changchun","School of Computer Science and Engineering, Changshu Institute of Technology, Changshu","School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu"],"affiliations":[{"raw_affiliation_string":"Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou, Jiangsu","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Ministry of Education, Jilin University, Changchun","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology, Changshu","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100414574","display_name":"Quan Liu","orcid":"https://orcid.org/0000-0002-8710-1810"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]},{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Liu","raw_affiliation_strings":["Ministry of Education, Jilin University, Changchun","School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu"],"affiliations":[{"raw_affiliation_string":"Ministry of Education, Jilin University, Changchun","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038799336","display_name":"Shengrong Gong","orcid":"https://orcid.org/0000-0003-0266-2422"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Changshu Institute of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]},{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengrong Gong","raw_affiliation_strings":["School of Computer Science and Engineering, Changshu Institute of Technology, Changshu","School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Changshu Institute of Technology, Changshu","institution_ids":["https://openalex.org/I21741975"]},{"raw_affiliation_string":"School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101997111","display_name":"Qiming Fu","orcid":"https://orcid.org/0000-0002-8720-9071"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiming Fu","raw_affiliation_strings":["College of Electronic & Information Engineering, Suzhou University of Science and Technology, Suzhou, Jiangsu","Ministry of Education, Jilin University, Changchun"],"affiliations":[{"raw_affiliation_string":"College of Electronic & Information Engineering, Suzhou University of Science and Technology, Suzhou, Jiangsu","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Ministry of Education, Jilin University, Changchun","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078388990","display_name":"Jin Xu","orcid":"https://orcid.org/0000-0003-0087-1757"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Xu","raw_affiliation_strings":["School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Soochow University, Suzhou, Jiangsu","institution_ids":["https://openalex.org/I3923682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101902909"],"corresponding_institution_ids":["https://openalex.org/I194450716","https://openalex.org/I21741975","https://openalex.org/I308837","https://openalex.org/I3923682"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20512945,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9121000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/piecewise","display_name":"Piecewise","score":0.8910186886787415},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7025080919265747},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5696403980255127},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5247507095336914},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4915796220302582},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.45711493492126465},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4420269727706909},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.42482906579971313},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4124155044555664},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4000963568687439},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.34488290548324585},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3249583840370178}],"concepts":[{"id":"https://openalex.org/C164660894","wikidata":"https://www.wikidata.org/wiki/Q2037833","display_name":"Piecewise","level":2,"score":0.8910186886787415},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7025080919265747},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5696403980255127},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5247507095336914},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4915796220302582},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.45711493492126465},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4420269727706909},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.42482906579971313},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4124155044555664},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4000963568687439},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.34488290548324585},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3249583840370178},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ssci.2017.8280911","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssci.2017.8280911","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Symposium Series on Computational Intelligence (SSCI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W137325057","https://openalex.org/W1491843047","https://openalex.org/W1626155273","https://openalex.org/W1758031947","https://openalex.org/W1870822514","https://openalex.org/W1979638690","https://openalex.org/W1982262386","https://openalex.org/W2048226872","https://openalex.org/W2072931156","https://openalex.org/W2112483970","https://openalex.org/W2121863487","https://openalex.org/W2132351269","https://openalex.org/W2141559645","https://openalex.org/W2158316397","https://openalex.org/W2290354866","https://openalex.org/W2912453235","https://openalex.org/W2963302368","https://openalex.org/W4245108548","https://openalex.org/W6605656211","https://openalex.org/W6638058698","https://openalex.org/W6758390756"],"related_works":["https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W3099153698","https://openalex.org/W4297873223","https://openalex.org/W3009457412","https://openalex.org/W2350784623"],"abstract_inverted_index":{"The":[0,68,109,129,172],"actor-critic":[1],"(AC)":[2],"algorithm":[3],"is":[4,72,87,151,162],"a":[5,30],"class":[6],"of":[7,20,78,93,112,141,155,190],"important":[8],"reinforcement":[9],"learning":[10,47,53,56],"(RL)":[11],"methods":[12,187],"commonly":[13],"used":[14],"in":[15,24,153,164,188],"continuous":[16,170],"MDPs.":[17,171],"However,":[18],"few":[19],"its":[21],"variants":[22],"concern":[23],"sample":[25,194],"efficiency.":[26,195],"This":[27],"paper":[28],"proposed":[29],"AC":[31,42],"variant,":[32],"called":[33],"AC-DPML,":[34],"aiming":[35],"at":[36],"handling":[37],"RL":[38,167],"problems":[39],"by":[40,104,138],"combining":[41],"with":[43,169],"dual":[44],"piecewise":[45,51,61,66,70,85,144,158],"model":[46,52,62,71,86],"and":[48,63,102,132,182,193],"planning.":[49],"Dual":[50],"stands":[54],"for":[55],"two":[57,114,165,179],"models,":[58],"the":[59,64,76,79,83,91,94,105,113,120,126,133,139,142,148,156,178,185],"state-based":[60,69,143],"action-based":[65,84,157],"model.":[67,159],"established":[73],"according":[74],"to":[75],"division":[77,92],"state":[80],"space,":[81],"while":[82],"built":[88],"depending":[89],"on":[90],"action":[95],"space.":[96],"Both":[97],"models":[98,115,180],"are":[99,116,135],"linearly":[100],"approximated":[101],"learned":[103],"samples":[106],"attributed":[107],"to.":[108],"planning":[110,140],"processes":[111],"launched":[117],"only":[118,147],"if":[119],"prediction":[121],"errors":[122],"do":[123],"not":[124],"exceed":[125],"error":[127],"threshold.":[128],"value":[130,149],"function":[131,150],"policy":[134],"further":[136],"updated":[137,152],"model,":[145],"but":[146],"that":[154,175],"Experimentally,":[160],"AC-DPML":[161,176],"implemented":[163],"classic":[166],"benchmarks":[168],"results":[173],"demonstrate":[174],"coordinates":[177],"perfectly":[181],"it":[183],"outperforms":[184],"representative":[186],"terms":[189],"convergence":[191],"rate":[192]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
