{"id":"https://openalex.org/W4402353287","doi":"https://doi.org/10.1109/ijcnn60899.2024.10649945","title":"Improving Zero-Shot Coordination with Diversely Rewarded Partner Agents","display_name":"Improving Zero-Shot Coordination with Diversely Rewarded Partner Agents","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402353287","doi":"https://doi.org/10.1109/ijcnn60899.2024.10649945"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10649945","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10649945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105068598","display_name":"Peilin Wu","orcid":"https://orcid.org/0009-0008-9225-787X"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Peilin Wu","raw_affiliation_strings":["Southern University of Science and Technology,Guangdong Provincial Key Laboratory of Brain-Inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Guangdong Provincial Key Laboratory of Brain-Inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100647430","display_name":"Zhenhua Yang","orcid":"https://orcid.org/0000-0001-8205-7999"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Yang","raw_affiliation_strings":["Southern University of Science and Technology,Guangdong Provincial Key Laboratory of Brain-Inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Guangdong Provincial Key Laboratory of Brain-Inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115621130","display_name":"Peng Yang","orcid":"https://orcid.org/0000-0003-4400-2289"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Yang","raw_affiliation_strings":["Southern University of Science and Technology,Guangdong Provincial Key Laboratory of Brain-Inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Guangdong Provincial Key Laboratory of Brain-Inspired Intelligent Computation,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5105068598"],"corresponding_institution_ids":["https://openalex.org/I3045169105"],"apc_list":null,"apc_paid":null,"fwci":0.7252,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75956072,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11252","display_name":"Evolutionary Game Theory and Cooperation","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6116724014282227},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.604490339756012},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.602537214756012},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33206310868263245},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.10481783747673035}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6116724014282227},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.604490339756012},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.602537214756012},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33206310868263245},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.10481783747673035},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10649945","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10649945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1606056663","https://openalex.org/W2041367235","https://openalex.org/W2296073425","https://openalex.org/W2736601468","https://openalex.org/W2913668833","https://openalex.org/W2980061931","https://openalex.org/W2991046523","https://openalex.org/W2997876358","https://openalex.org/W3169291081","https://openalex.org/W3175608395","https://openalex.org/W3181659843","https://openalex.org/W4286748781","https://openalex.org/W4286902222","https://openalex.org/W4287126489","https://openalex.org/W4287997992","https://openalex.org/W4287998029","https://openalex.org/W4291226485","https://openalex.org/W4302010773","https://openalex.org/W4375870051","https://openalex.org/W4379739787","https://openalex.org/W4382202823","https://openalex.org/W4382239238","https://openalex.org/W4385768174","https://openalex.org/W6741002519","https://openalex.org/W6748603076","https://openalex.org/W6767914801","https://openalex.org/W6770887232","https://openalex.org/W6771670691","https://openalex.org/W6771904302","https://openalex.org/W6772111244","https://openalex.org/W6774673717","https://openalex.org/W6784745743","https://openalex.org/W6796289742","https://openalex.org/W6797779983","https://openalex.org/W6798508043","https://openalex.org/W6802513241","https://openalex.org/W6804244202","https://openalex.org/W6810745647","https://openalex.org/W6811554491","https://openalex.org/W6839029903","https://openalex.org/W6840380725","https://openalex.org/W6842091875","https://openalex.org/W6846731017","https://openalex.org/W6849272965","https://openalex.org/W6849465841","https://openalex.org/W6853350048","https://openalex.org/W7028177604"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528"],"abstract_inverted_index":{"Zero-shot":[0],"coordination":[1,8,21,45,194,214,234],"studies":[2,132],"the":[3,11,102,126,137,191,199,212,226,231],"training":[4,75,192],"of":[5,32,38,67,86,90,128,140,193,205,216,228],"well-generalizing":[6],"human-AI":[7,233],"agents":[9,34,53,95,106,207,217],"in":[10,225],"scenario":[12],"where":[13],"human":[14,39,68,99,115,141],"data":[15],"is":[16,203],"unavailable.":[17],"To":[18],"obtain":[19],"a":[20,30,44,64,71,88,157,166,176],"agent":[22,46],"generalize":[23],"to":[24,56,62,96,135,155,185],"unseen":[25,98],"humans,":[26],"prevailing":[27],"methods":[28,85,224],"generate":[29],"population":[31],"partner":[33,49,52,94,105],"as":[35,58,60],"proxy":[36],"models":[37],"partners":[40,221],"and":[41,76,121,149,172,180,189],"then":[42],"train":[43,156],"with":[47,208,219],"these":[48,131,220],"agents.":[50,195],"Constructed":[51],"are":[54,117,153,183],"expected":[55],"be":[57],"diverse":[59,93],"possible":[61],"cover":[63],"wide":[65],"range":[66],"behaviors,":[69],"preventing":[70],"distribution":[72],"shift":[73],"between":[74],"testing":[77],"stages.":[78],"Recent":[79],"works":[80],"concentrate":[81],"on":[82,162],"studying":[83],"effective":[84],"creating":[87],"group":[89],"high-reward":[91,104,173],"while":[92],"model":[97],"partners.":[100,142,174],"However,":[101],"resulting":[103],"do":[107],"not":[108,118],"accurately":[109],"reflect":[110],"real-world":[111],"situations,":[112],"considering":[113],"that":[114,198],"decisions":[116],"always":[119],"optimal":[120],"may":[122],"sometimes":[123],"even":[124],"hinder":[125],"progression":[127],"coordination.":[129],"Therefore,":[130],"still":[133],"struggle":[134],"capture":[136],"potential":[138],"characteristics":[139],"In":[143],"this":[144],"work,":[145],"reinforcement":[146],"learning":[147,151,182],"(RL)":[148],"supervised":[150],"(SL)":[152],"integrated":[154],"reward-conditioned":[158,167,201],"policy.":[159],"By":[160],"conditioned":[161],"different":[163,209],"desired":[164],"rewards,":[165],"policy":[168,202],"simulates":[169],"both":[170],"low-reward":[171],"Additionally,":[175],"reward-bucketed":[177],"replay":[178],"buffer":[179],"curriculum":[181],"applied":[184],"enhance":[186],"reward":[187],"diversity":[188],"boost":[190],"Experiments":[196],"demonstrate":[197],"proposed":[200],"capable":[204],"generating":[206],"rewards.":[210],"Moreover,":[211],"zero-shot":[213],"performance":[215],"trained":[218],"surpasses":[222],"previous":[223],"majority":[227],"scenarios":[229],"within":[230],"Overcooked":[232],"benchmark.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
