{"id":"https://openalex.org/W4285607085","doi":"https://doi.org/10.24963/ijcai.2022/528","title":"Multi-Constraint Deep Reinforcement Learning for Smooth Action Control","display_name":"Multi-Constraint Deep Reinforcement Learning for Smooth Action Control","publication_year":2022,"publication_date":"2022-07-01","ids":{"openalex":"https://openalex.org/W4285607085","doi":"https://doi.org/10.24963/ijcai.2022/528"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2022/528","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2022/528","pdf_url":"https://www.ijcai.org/proceedings/2022/0528.pdf","source":{"id":"https://openalex.org/S4363608755","display_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.ijcai.org/proceedings/2022/0528.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022391513","display_name":"Guangyuan Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangyuan Zou","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","College of Computer Science and Software Engineering, Shenzhen University, P.R. China","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","institution_ids":[]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, P.R. China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389169","display_name":"Ying He","orcid":"https://orcid.org/0000-0002-6749-4485"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying He","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","College of Computer Science and Software Engineering, Shenzhen University, P.R. China","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","institution_ids":[]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, P.R. China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100420016","display_name":"F. Richard Yu","orcid":"https://orcid.org/0000-0003-1006-7594"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"F. Richard Yu","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","College of Computer Science and Software Engineering, Shenzhen University, P.R. China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","institution_ids":[]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, P.R. China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102785196","display_name":"Longquan Chen","orcid":"https://orcid.org/0000-0002-4340-1360"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longquan Chen","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China","College of Computer Science and Software Engineering, Shenzhen University, P.R. China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","institution_ids":[]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, P.R. China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073490832","display_name":"Weike Pan","orcid":"https://orcid.org/0000-0001-6326-9531"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weike Pan","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University","College of Computer Science and Software Engineering, Shenzhen University, P.R. China","Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, P.R. China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100633973","display_name":"Zhong Ming","orcid":"https://orcid.org/0000-0001-9310-3460"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhong Ming","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University","College of Computer Science and Software Engineering, Shenzhen University, P.R. China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, P.R. China","institution_ids":["https://openalex.org/I180726961"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5022391513"],"corresponding_institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4210136793"],"apc_list":null,"apc_paid":null,"fwci":0.1044,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.26719661,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"3802","last_page":"3808"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.947700023651123,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10768","display_name":"Electric Vehicles and Infrastructure","score":0.9079999923706055,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8781262636184692},{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.7720372676849365},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.69635409116745},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6866222620010376},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5612044930458069},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5341377258300781},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5299350619316101},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.42764997482299805},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33285361528396606},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16524815559387207}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8781262636184692},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.7720372676849365},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.69635409116745},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6866222620010376},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5612044930458069},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5341377258300781},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5299350619316101},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.42764997482299805},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33285361528396606},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16524815559387207},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2022/528","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2022/528","pdf_url":"https://www.ijcai.org/proceedings/2022/0528.pdf","source":{"id":"https://openalex.org/S4363608755","display_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2022/528","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2022/528","pdf_url":"https://www.ijcai.org/proceedings/2022/0528.pdf","source":{"id":"https://openalex.org/S4363608755","display_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.5799999833106995}],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1477544716","display_name":null,"funder_award_id":"Guangdong","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5053816963","display_name":null,"funder_award_id":"62002238","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7097245526","display_name":null,"funder_award_id":"61836005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4285607085.pdf","grobid_xml":"https://content.openalex.org/works/W4285607085.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1518931405","https://openalex.org/W2145339207","https://openalex.org/W2171029115","https://openalex.org/W2462906003","https://openalex.org/W2736601468","https://openalex.org/W2804791273","https://openalex.org/W2904246096","https://openalex.org/W2951799221","https://openalex.org/W2963864421","https://openalex.org/W2994712737","https://openalex.org/W3032398409","https://openalex.org/W3045839081","https://openalex.org/W3162902207","https://openalex.org/W3176539729","https://openalex.org/W4200630651","https://openalex.org/W4214717370","https://openalex.org/W4287284530","https://openalex.org/W4287725923","https://openalex.org/W4287867830","https://openalex.org/W4287907838","https://openalex.org/W4293545785","https://openalex.org/W4295719664","https://openalex.org/W4297824337"],"related_works":["https://openalex.org/W2393022482","https://openalex.org/W2377346130","https://openalex.org/W2361092061","https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W2319775965","https://openalex.org/W2357314690","https://openalex.org/W217960748"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2],"(DRL)":[3],"has":[4,118],"been":[5],"studied":[6],"in":[7,57,77],"a":[8,68,96],"variety":[9],"of":[10,43,54,75],"challenging":[11],"decision-making":[12],"tasks,":[13],"e.g.,":[14],"autonomous":[15],"driving.":[16],"\\textcolor{black}{However,":[17],"DRL":[18,107,130],"typically":[19],"suffers":[20],"from":[21],"the":[22,44,51,55,73,78,105,114,124],"action":[23,120],"shaking":[24],"problem,":[25],"which":[26],"means":[27],"that":[28,113],"agents":[29],"can":[30],"select":[31],"actions":[32,76],"with":[33,123],"big":[34],"difference":[35],"even":[36],"though":[37],"states":[38],"only":[39],"slightly":[40],"differ.}":[41],"One":[42],"crucial":[45],"reasons":[46],"for":[47],"this":[48,60,64],"issue":[49],"is":[50,134],"inappropriate":[52],"design":[53],"reward":[56],"DRL.":[58],"In":[59,92],"paper,":[61],"to":[62,71,89,103],"address":[63],"issue,":[65],"we":[66,81,94],"propose":[67,95],"novel":[69],"way":[70],"incorporate":[72],"smoothness":[74,121],"reward.":[79],"Specifically,":[80],"introduce":[82],"sub-rewards":[83],"and":[84,128],"add":[85],"multiple":[86],"constraints":[87],"related":[88],"these":[90],"sub-rewards.":[91],"addition,":[93],"multi-constraint":[97,106],"proximal":[98],"policy":[99],"optimization":[100],"(MCPPO)":[101],"method":[102,117],"solve":[104],"problem.":[108],"Extensive":[109],"simulation":[110],"results":[111],"show":[112],"proposed":[115],"MCPPO":[116],"better":[119],"compared":[122],"traditional":[125],"proportional-integral-differential":[126],"(PID)":[127],"mainstream":[129],"algorithms.":[131],"The":[132],"video":[133],"available":[135],"at":[136],"https://youtu.be/F2jpaSm7YOg.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
