{"id":"https://openalex.org/W4385488495","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191982","title":"Evolving Constrained Reinforcement Learning Policy","display_name":"Evolving Constrained Reinforcement Learning Policy","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385488495","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191982"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10191982","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191982","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076937094","display_name":"Chengpeng Hu","orcid":"https://orcid.org/0000-0001-8343-4186"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chengpeng Hu","raw_affiliation_strings":["Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology,Shenzhen,China","Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China","Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology,Shenzhen,China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073126067","display_name":"Jiyuan Pei","orcid":"https://orcid.org/0000-0001-9860-5160"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiyuan Pei","raw_affiliation_strings":["Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology,Shenzhen,China","Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China","Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology,Shenzhen,China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100456636","display_name":"Jialin Liu","orcid":"https://orcid.org/0000-0001-7047-8454"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jialin Liu","raw_affiliation_strings":["Southern University of Science and Technology,Guangdong Key Laboratory of Brain-inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China","Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China","Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Guangdong Key Laboratory of Brain-inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100635494","display_name":"Xin Yao","orcid":"https://orcid.org/0000-0001-8837-4442"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Yao","raw_affiliation_strings":["Southern University of Science and Technology,Guangdong Key Laboratory of Brain-inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China","Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China","Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Guangdong Key Laboratory of Brain-inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems (RITAS), Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Guangdong Key Laboratory of Brain-inspired Intelligent Computation, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5076937094"],"corresponding_institution_ids":["https://openalex.org/I3045169105"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54465231,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9702000021934509,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8780097961425781},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7429469227790833},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6528249979019165},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6171118021011353},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5542094111442566},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5159791111946106},{"id":"https://openalex.org/keywords/constraint-programming","display_name":"Constraint programming","score":0.4997696876525879},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4642316699028015},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.46232014894485474},{"id":"https://openalex.org/keywords/constraint-satisfaction","display_name":"Constraint satisfaction","score":0.425251841545105},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3882007300853729},{"id":"https://openalex.org/keywords/stochastic-programming","display_name":"Stochastic programming","score":0.16263261437416077},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12318947911262512},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.11532235145568848}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8780097961425781},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7429469227790833},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6528249979019165},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6171118021011353},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5542094111442566},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5159791111946106},{"id":"https://openalex.org/C173404611","wikidata":"https://www.wikidata.org/wiki/Q528588","display_name":"Constraint programming","level":3,"score":0.4997696876525879},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4642316699028015},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.46232014894485474},{"id":"https://openalex.org/C44616089","wikidata":"https://www.wikidata.org/wiki/Q30158686","display_name":"Constraint satisfaction","level":3,"score":0.425251841545105},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3882007300853729},{"id":"https://openalex.org/C137631369","wikidata":"https://www.wikidata.org/wiki/Q7617831","display_name":"Stochastic programming","level":2,"score":0.16263261437416077},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12318947911262512},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.11532235145568848},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10191982","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191982","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4565357659","display_name":null,"funder_award_id":"JCYJ20190809121403553","funder_id":"https://openalex.org/F4320329791","funder_display_name":"Shenzhen Fundamental Research Program"},{"id":"https://openalex.org/G8890439619","display_name":null,"funder_award_id":"62250710682,61906083","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329791","display_name":"Shenzhen Fundamental Research Program","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W182470170","https://openalex.org/W1777239053","https://openalex.org/W1845972764","https://openalex.org/W2070570138","https://openalex.org/W2145339207","https://openalex.org/W2151339633","https://openalex.org/W2155262811","https://openalex.org/W2158782408","https://openalex.org/W2295986874","https://openalex.org/W2312339748","https://openalex.org/W2596367596","https://openalex.org/W2766447205","https://openalex.org/W2781726626","https://openalex.org/W2804791273","https://openalex.org/W2894662639","https://openalex.org/W2951278471","https://openalex.org/W2951799422","https://openalex.org/W2994712737","https://openalex.org/W2998619042","https://openalex.org/W3099303207","https://openalex.org/W3120345700","https://openalex.org/W3132081946","https://openalex.org/W3183892796","https://openalex.org/W3189576409","https://openalex.org/W3198149465","https://openalex.org/W3216080622","https://openalex.org/W4226292595","https://openalex.org/W4287725923","https://openalex.org/W4288359891","https://openalex.org/W4293545785","https://openalex.org/W4301501993","https://openalex.org/W4308177782","https://openalex.org/W6638088447","https://openalex.org/W6639175102","https://openalex.org/W6697228413","https://openalex.org/W6737893269","https://openalex.org/W6747473740","https://openalex.org/W6751725685","https://openalex.org/W6755903938","https://openalex.org/W6761920647","https://openalex.org/W6771280675","https://openalex.org/W6780587392","https://openalex.org/W6784293744","https://openalex.org/W6788085919","https://openalex.org/W6798240405","https://openalex.org/W6810891056"],"related_works":["https://openalex.org/W2097415053","https://openalex.org/W1514298554","https://openalex.org/W1486916474","https://openalex.org/W4243324743","https://openalex.org/W1531664660","https://openalex.org/W2120647900","https://openalex.org/W1496100162","https://openalex.org/W4303044088","https://openalex.org/W1169406866","https://openalex.org/W4313469555"],"abstract_inverted_index":{"Evolutionary":[0],"algorithms":[1],"have":[2],"been":[3],"used":[4],"to":[5,11,22,39,113],"evolve":[6],"a":[7,59,89,96],"population":[8],"of":[9,91,121],"actors":[10],"generate":[12],"diverse":[13],"experiences":[14],"for":[15],"training":[16],"reinforcement":[17,63],"learning":[18,64],"agents,":[19],"which":[20,67],"helps":[21],"tackle":[23],"the":[24,31,44,47,70,80,84,119],"temporal":[25],"credit":[26],"assignment":[27],"problem":[28],"and":[29,49,72,78,125],"improves":[30],"exploration":[32],"efficiency.":[33],"However,":[34],"when":[35],"adapting":[36],"this":[37,55],"approach":[38],"address":[40],"constrained":[41,62],"problems,":[42],"balancing":[43],"trade-off":[45],"between":[46],"reward":[48,71],"constraint":[50,73,97,126],"violation":[51,74],"is":[52],"hard.":[53],"In":[54],"paper,":[56],"we":[57],"propose":[58],"novel":[60],"evolutionary":[61],"(ECRL)":[65],"algorithm,":[66],"adaptively":[68],"balances":[69],"with":[75,95],"stochastic":[76,123],"ranking,":[77],"at":[79],"same":[81],"time,":[82],"restricts":[83],"policy's":[85],"behaviour":[86],"by":[87],"maintaining":[88],"set":[90],"Lagrange":[92],"relaxation":[93],"coefficients":[94],"buffer.":[98,127],"Extensive":[99],"experiments":[100],"on":[101],"robotic":[102],"control":[103],"benchmarks":[104],"show":[105],"that":[106],"our":[107],"ECRL":[108],"achieves":[109],"outstanding":[110],"performance":[111],"compared":[112],"state-of-the-art":[114],"algorithms.":[115],"Ablation":[116],"analysis":[117],"shows":[118],"benefits":[120],"introducing":[122],"ranking":[124]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
