{"id":"https://openalex.org/W4396941629","doi":"https://doi.org/10.1109/tii.2024.3391934","title":"Safe Multiagent Learning With Soft Constrained Policy Optimization in Real Robot Control","display_name":"Safe Multiagent Learning With Soft Constrained Policy Optimization in Real Robot Control","publication_year":2024,"publication_date":"2024-05-15","ids":{"openalex":"https://openalex.org/W4396941629","doi":"https://doi.org/10.1109/tii.2024.3391934"},"language":"en","primary_location":{"id":"doi:10.1109/tii.2024.3391934","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tii.2024.3391934","pdf_url":null,"source":{"id":"https://openalex.org/S184777250","display_name":"IEEE Transactions on Industrial Informatics","issn_l":"1551-3203","issn":["1551-3203","1941-0050"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Industrial Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062677595","display_name":"Shangding Gu","orcid":"https://orcid.org/0000-0002-2722-3779"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Shangding Gu","raw_affiliation_strings":["School of Computation, Information and Technology, Technical University of Munich, Garching, Germany"],"affiliations":[{"raw_affiliation_string":"School of Computation, Information and Technology, Technical University of Munich, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102782165","display_name":"Dianye Huang","orcid":"https://orcid.org/0000-0001-7719-6505"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dianye Huang","raw_affiliation_strings":["School of Computation, Information and Technology, Technical University of Munich, Garching, Germany"],"affiliations":[{"raw_affiliation_string":"School of Computation, Information and Technology, Technical University of Munich, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049802452","display_name":"Muning Wen","orcid":"https://orcid.org/0009-0000-7868-1262"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Muning Wen","raw_affiliation_strings":["School of Electronic, Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic, Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323037","display_name":"Guang Chen","orcid":"https://orcid.org/0000-0002-7416-592X"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Chen","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063781430","display_name":"Alois Knoll","orcid":"https://orcid.org/0000-0003-4840-076X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alois Knoll","raw_affiliation_strings":["School of Computation, Information and Technology, Technical University of Munich, Garching, Germany"],"affiliations":[{"raw_affiliation_string":"School of Computation, Information and Technology, Technical University of Munich, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5062677595"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":1.3901,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83458582,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"20","issue":"9","first_page":"10706","last_page":"10716"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.9552000164985657,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/marl","display_name":"Marl","score":0.8315252661705017},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8136413097381592},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6495466828346252},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5407706499099731},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.5128672122955322},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.47098371386528015},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.42756128311157227},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36348623037338257},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3324090242385864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32333338260650635},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.27601712942123413},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18186280131340027},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1197061836719513}],"concepts":[{"id":"https://openalex.org/C92927620","wikidata":"https://www.wikidata.org/wiki/Q184053","display_name":"Marl","level":3,"score":0.8315252661705017},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8136413097381592},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6495466828346252},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5407706499099731},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.5128672122955322},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.47098371386528015},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.42756128311157227},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36348623037338257},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3324090242385864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32333338260650635},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.27601712942123413},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18186280131340027},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1197061836719513},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C109007969","wikidata":"https://www.wikidata.org/wiki/Q749565","display_name":"Structural basin","level":2,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tii.2024.3391934","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tii.2024.3391934","pdf_url":null,"source":{"id":"https://openalex.org/S184777250","display_name":"IEEE Transactions on Industrial Informatics","issn_l":"1551-3203","issn":["1551-3203","1941-0050"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Industrial Informatics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2150189917","https://openalex.org/W2153427071","https://openalex.org/W2736601468","https://openalex.org/W2762248135","https://openalex.org/W2787908307","https://openalex.org/W2948652605","https://openalex.org/W2991046523","https://openalex.org/W2991391803","https://openalex.org/W3094224934","https://openalex.org/W3099689767","https://openalex.org/W3100366369","https://openalex.org/W3101572197","https://openalex.org/W3138294267","https://openalex.org/W3173294282","https://openalex.org/W3201315469","https://openalex.org/W4225659902","https://openalex.org/W4281480521","https://openalex.org/W4292313830","https://openalex.org/W4299802797","https://openalex.org/W4327571609","https://openalex.org/W4388616010","https://openalex.org/W4390092221","https://openalex.org/W6638018090","https://openalex.org/W6680172340","https://openalex.org/W6737893269","https://openalex.org/W6738796088","https://openalex.org/W6741002519","https://openalex.org/W6756486208","https://openalex.org/W6771280675","https://openalex.org/W6780587392","https://openalex.org/W6785187516","https://openalex.org/W6785517317","https://openalex.org/W6788165329","https://openalex.org/W6789259823","https://openalex.org/W6791533262","https://openalex.org/W6802002411","https://openalex.org/W6838701874","https://openalex.org/W6839813222","https://openalex.org/W6839884968","https://openalex.org/W6860190918"],"related_works":["https://openalex.org/W2126019709","https://openalex.org/W1702901972","https://openalex.org/W4249798507","https://openalex.org/W2069775250","https://openalex.org/W2093541819","https://openalex.org/W2032294417","https://openalex.org/W2152754392","https://openalex.org/W1989172970","https://openalex.org/W2196316523","https://openalex.org/W1899363654"],"abstract_inverted_index":{"Due":[0],"to":[1,54,76,146],"a":[2,7,34,63,81,114],"lack":[3],"of":[4,10,45,89,99,150],"safety":[5,24,132,171],"considerations,":[6],"wide":[8],"range":[9],"multiagent":[11],"reinforcement":[12],"learning":[13,116],"(MARL)":[14],"applications":[15,47],"are":[16],"limited":[17],"in":[18,29,70,80],"real-world":[19,46,144],"environments.":[20],"Thus,":[21],"ensuring":[22],"MARL":[23,40,50,91,101,127],"is":[25,92,108],"essential":[26],"and":[27,42,110,140,143,170,173],"urgent":[28],"the":[30,38,43,78,86,95,137,148,154,166],"domain.":[31],"However,":[32],"merely":[33],"few":[35],"studies":[36],"consider":[37],"safe":[39,49,90,96,100,115,119],"problem,":[41],"investigation":[44],"using":[48],"algorithms":[51,75,102,128],"still":[52],"needs":[53],"be":[55,124],"improved.":[56],"To":[57],"fill":[58],"this":[59],"gap,":[60],"we":[61,72,111,135],"provide":[62],"framework":[64,117,122],"with":[65],"soft":[66,105],"constrained":[67,106],"policy":[68,97],"optimization,":[69],"which":[71],"develop":[73],"practical":[74],"address":[77],"problem":[79,87],"cooperative":[82],"game":[83],"setting.":[84],"First,":[85],"formulation":[88],"introduced.":[93],"Second,":[94],"optimization":[98,107],"based":[103],"on":[104],"analyzed,":[109],"further":[112],"propose":[113],"for":[118],"MARL.":[120],"The":[121],"can":[123],"plugged":[125],"into":[126],"without":[129],"manually":[130],"fine-tuning":[131],"bounds.":[133],"Third,":[134],"investigate":[136],"sim-to-real":[138],"problems,":[139],"conduct":[141],"simulation":[142],"experiments":[145],"evaluate":[147],"effectiveness":[149],"our":[151,160],"algorithms.":[152],"Finally,":[153],"comprehensive":[155],"experimental":[156],"results":[157],"indicate":[158],"that":[159],"method":[161],"has":[162],"significant":[163],"benefits":[164],"regarding":[165],"balance":[167],"between":[168],"reward":[169],"performance":[172],"outperforms":[174],"several":[175],"strong":[176],"baselines.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
