{"id":"https://openalex.org/W4391020367","doi":"https://doi.org/10.1109/cdc49753.2023.10383458","title":"Scalable Robust Multi-Agent Reinforcement Learning for Model Uncertainty","display_name":"Scalable Robust Multi-Agent Reinforcement Learning for Model Uncertainty","publication_year":2023,"publication_date":"2023-12-13","ids":{"openalex":"https://openalex.org/W4391020367","doi":"https://doi.org/10.1109/cdc49753.2023.10383458"},"language":"en","primary_location":{"id":"doi:10.1109/cdc49753.2023.10383458","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/cdc49753.2023.10383458","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090738504","display_name":"Younkyung Jwa","orcid":"https://orcid.org/0000-0001-8253-9322"},"institutions":[{"id":"https://openalex.org/I39534123","display_name":"Gwangju Institute of Science and Technology","ror":"https://ror.org/024kbgz78","country_code":"KR","type":"education","lineage":["https://openalex.org/I39534123"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Younkyung Jwa","raw_affiliation_strings":["School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST),Gwangju,Korea","School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST), Gwangju, Korea"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST),Gwangju,Korea","institution_ids":["https://openalex.org/I39534123"]},{"raw_affiliation_string":"School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST), Gwangju, Korea","institution_ids":["https://openalex.org/I39534123"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011306979","display_name":"Minseon Gwak","orcid":null},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minseon Gwak","raw_affiliation_strings":["Pohang University of Science and Technology (POSTECH),Department of Electrical Engineering,Pohang,Korea","Department of Electrical Engineering, Pohang University of Science and Technology (POSTECH), Pohang, Korea"],"affiliations":[{"raw_affiliation_string":"Pohang University of Science and Technology (POSTECH),Department of Electrical Engineering,Pohang,Korea","institution_ids":["https://openalex.org/I123900574"]},{"raw_affiliation_string":"Department of Electrical Engineering, Pohang University of Science and Technology (POSTECH), Pohang, Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083962111","display_name":"Jiin Kwak","orcid":null},"institutions":[{"id":"https://openalex.org/I48566637","display_name":"Ulsan National Institute of Science and Technology","ror":"https://ror.org/017cjz748","country_code":"KR","type":"education","lineage":["https://openalex.org/I48566637"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jiin Kwak","raw_affiliation_strings":["Artificial Intelligence Graduate School, Ulsan National Institute of Science and Technology (UNIST),Ulsan,Korea","Artificial Intelligence Graduate School, Ulsan National Institute of Science and Technology (UNIST), Ulsan, Korea"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Graduate School, Ulsan National Institute of Science and Technology (UNIST),Ulsan,Korea","institution_ids":["https://openalex.org/I48566637"]},{"raw_affiliation_string":"Artificial Intelligence Graduate School, Ulsan National Institute of Science and Technology (UNIST), Ulsan, Korea","institution_ids":["https://openalex.org/I48566637"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028715246","display_name":"Chang Wook Ahn","orcid":"https://orcid.org/0000-0002-9902-5966"},"institutions":[{"id":"https://openalex.org/I39534123","display_name":"Gwangju Institute of Science and Technology","ror":"https://ror.org/024kbgz78","country_code":"KR","type":"education","lineage":["https://openalex.org/I39534123"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Chang Wook Ahn","raw_affiliation_strings":["School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST),Gwangju,Korea","School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST), Gwangju, Korea"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST),Gwangju,Korea","institution_ids":["https://openalex.org/I39534123"]},{"raw_affiliation_string":"School of Artificial Intelligence, Gwangju Institute of Science and Technology (GIST), Gwangju, Korea","institution_ids":["https://openalex.org/I39534123"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088952968","display_name":"PooGyeon Park","orcid":"https://orcid.org/0000-0002-8249-5427"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"PooGyeon Park","raw_affiliation_strings":["Pohang University of Science and Technology (POSTECH),Department of Electrical Engineering,Pohang,Korea","Department of Electrical Engineering, Pohang University of Science and Technology (POSTECH), Pohang, Korea"],"affiliations":[{"raw_affiliation_string":"Pohang University of Science and Technology (POSTECH),Department of Electrical Engineering,Pohang,Korea","institution_ids":["https://openalex.org/I123900574"]},{"raw_affiliation_string":"Department of Electrical Engineering, Pohang University of Science and Technology (POSTECH), Pohang, Korea","institution_ids":["https://openalex.org/I123900574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090738504"],"corresponding_institution_ids":["https://openalex.org/I39534123"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19135389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"33","issue":null,"first_page":"3402","last_page":"3407"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9524999856948853,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9523000121116638,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8324175477027893},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7117505073547363},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.699539303779602},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6358592510223389},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5700553059577942},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.5555115342140198},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.46775317192077637},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46176308393478394},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.44358205795288086},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4185546338558197},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3825439214706421},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1676928997039795}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8324175477027893},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7117505073547363},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.699539303779602},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6358592510223389},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5700553059577942},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.5555115342140198},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.46775317192077637},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46176308393478394},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.44358205795288086},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4185546338558197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3825439214706421},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1676928997039795},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc49753.2023.10383458","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/cdc49753.2023.10383458","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G4399222365","display_name":null,"funder_award_id":"2020R1A2C2005709","funder_id":"https://openalex.org/F4320322030","funder_display_name":"Ministry of Science, ICT and Future Planning"},{"id":"https://openalex.org/G8390078958","display_name":null,"funder_award_id":"2020R1A2C2005709","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320322030","display_name":"Ministry of Science, ICT and Future Planning","ror":"https://ror.org/032e49973"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2486811804","https://openalex.org/W2602275733","https://openalex.org/W2785529341","https://openalex.org/W2904455790","https://openalex.org/W2976036462","https://openalex.org/W3012934742","https://openalex.org/W3030840723","https://openalex.org/W3035141911","https://openalex.org/W3091492359","https://openalex.org/W3098237412","https://openalex.org/W3100191104","https://openalex.org/W3101572197","https://openalex.org/W3189694631","https://openalex.org/W3205685096","https://openalex.org/W3214274533","https://openalex.org/W4220807181","https://openalex.org/W4281992443","https://openalex.org/W4298140820","https://openalex.org/W4299802797","https://openalex.org/W4315488732","https://openalex.org/W4318479386","https://openalex.org/W6735677848","https://openalex.org/W6738796088","https://openalex.org/W6759312646","https://openalex.org/W6771232374","https://openalex.org/W6771904302","https://openalex.org/W6774966973","https://openalex.org/W6779279235","https://openalex.org/W6785517317","https://openalex.org/W6802038118"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W3096874164","https://openalex.org/W1985560493","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W2341346307","https://openalex.org/W3168977894"],"abstract_inverted_index":{"A":[0],"robust":[1,20,46,70,83,155],"multi-agent":[2,72],"reinforcement":[3],"learning":[4],"(MARL)":[5],"algorithm":[6,50],"using":[7],"a":[8,19,25,33,69,87,110],"nature":[9,162],"actor":[10],"has":[11],"been":[12],"shown":[13],"to":[14,106,135,152],"be":[15],"effective":[16],"in":[17,53],"finding":[18],"Nash":[21],"equilibrium":[22],"(NE)":[23],"of":[24,48,113,144,171,180,184],"Markov":[26,156],"game":[27,157],"with":[28,55,68,118,161,164],"model":[29,185],"uncertainty.":[30,186],"However,":[31],"since":[32],"game-size":[34],"scaling":[35],"increases":[36],"the":[37,43,45,49,93,96,107,123,142,145,148,154,172],"search":[38,85,89],"space":[39,90],"and":[40,102,131,140,169,182],"challenges":[41],"reaching":[42],"NE,":[44],"property":[47],"is":[51,116,150],"reduced":[52],"environments":[54],"many":[56],"agents.":[57],"This":[58],"paper":[59],"proposes":[60],"an":[61,81],"evolutionary":[62],"diversity-maintaining":[63],"population":[64,111],"curriculum":[65],"(EDPC)":[66],"framework":[67],"attention-based":[71,165],"deep":[73],"deterministic":[74],"policy":[75],"gradient":[76],"(RA-MADDPG)":[77],"algorithm,":[78],"which":[79],"enables":[80],"efficient":[82],"NE":[84],"by":[86],"structured":[88],"expansion.":[91],"In":[92],"EDPC":[94],"framework,":[95],"MARL":[97],"divides":[98],"into":[99],"several":[100],"stages,":[101],"when":[103],"moving":[104],"on":[105],"next":[108],"stage,":[109],"consisting":[112],"larger":[114],"games":[115,121],"made":[117],"two":[119],"parent":[120,129],"from":[122],"previous":[124],"stage.":[125],"We":[126],"introduce":[127],"reward-proportionate":[128],"selection":[130],"reward-guided":[132],"mutation":[133],"methods":[134],"continue":[136],"reinforcing":[137],"superior":[138],"agents":[139,181],"maintain":[141],"diversity":[143],"population.":[146],"Furthermore,":[147],"RA-MADDPG":[149],"used":[151],"solve":[153],"at":[158],"each":[159],"stage":[160],"actors":[163],"architectures.":[166],"The":[167],"scalability":[168],"robustness":[170],"proposed":[173],"method":[174],"are":[175],"evaluated":[176],"for":[177],"different":[178],"numbers":[179],"levels":[183]},"counts_by_year":[],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
