{"id":"https://openalex.org/W4224932507","doi":"https://doi.org/10.1109/icassp43922.2022.9747378","title":"Competitive Multi-Agent Reinforcement Learning with Self-Supervised Representation","display_name":"Competitive Multi-Agent Reinforcement Learning with Self-Supervised Representation","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224932507","doi":"https://doi.org/10.1109/icassp43922.2022.9747378"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747378","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747378","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041890767","display_name":"DiJia Su","orcid":"https://orcid.org/0000-0001-9369-4715"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"DiJia Su","raw_affiliation_strings":["Princeton University"],"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059740024","display_name":"Jason D. Lee","orcid":"https://orcid.org/0000-0003-0064-7800"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason D. Lee","raw_affiliation_strings":["Princeton University"],"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076430013","display_name":"John M. Mulvey","orcid":"https://orcid.org/0000-0002-4290-0870"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John M. Mulvey","raw_affiliation_strings":["Princeton University"],"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042307561","display_name":"H. Vincent Poor","orcid":"https://orcid.org/0000-0002-2062-131X"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H. Vincent Poor","raw_affiliation_strings":["Princeton University"],"affiliations":[{"raw_affiliation_string":"Princeton University","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5041890767"],"corresponding_institution_ids":["https://openalex.org/I20089843"],"apc_list":null,"apc_paid":null,"fwci":0.1039,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.23545572,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1998","issue":null,"first_page":"4098","last_page":"4102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8837260007858276},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8006137609481812},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7785820960998535},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.7185020446777344},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7175406217575073},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7059453725814819},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6237932443618774},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5115073919296265},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4461027979850769},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.07642826437950134},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06871011853218079}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8837260007858276},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8006137609481812},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7785820960998535},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.7185020446777344},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7175406217575073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7059453725814819},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6237932443618774},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5115073919296265},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4461027979850769},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.07642826437950134},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06871011853218079},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747378","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747378","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1542941925","https://openalex.org/W2104602264","https://openalex.org/W2145339207","https://openalex.org/W2475089067","https://openalex.org/W2616247523","https://openalex.org/W2736601468","https://openalex.org/W2749928749","https://openalex.org/W2766447205","https://openalex.org/W2803926500","https://openalex.org/W2807741983","https://openalex.org/W2912793366","https://openalex.org/W2920362155","https://openalex.org/W2949464762","https://openalex.org/W2950872548","https://openalex.org/W2963095800","https://openalex.org/W2963420272","https://openalex.org/W2964251366","https://openalex.org/W3021708257","https://openalex.org/W3034806614","https://openalex.org/W3090386093","https://openalex.org/W3102564565","https://openalex.org/W3130091361","https://openalex.org/W3134226813","https://openalex.org/W3211536067","https://openalex.org/W4295150809","https://openalex.org/W4297797010","https://openalex.org/W4297808394","https://openalex.org/W6675811377","https://openalex.org/W6704369950","https://openalex.org/W6721101288","https://openalex.org/W6729556111","https://openalex.org/W6730111887","https://openalex.org/W6741002519","https://openalex.org/W6743802245","https://openalex.org/W6744562401","https://openalex.org/W6748203849","https://openalex.org/W6748638692","https://openalex.org/W6748910126","https://openalex.org/W6751912414","https://openalex.org/W6752380930","https://openalex.org/W6759194994","https://openalex.org/W6760405395","https://openalex.org/W6763484891","https://openalex.org/W6773507897","https://openalex.org/W6776867236","https://openalex.org/W6781476637","https://openalex.org/W6783838878","https://openalex.org/W6790140328","https://openalex.org/W6791533262","https://openalex.org/W6844194202"],"related_works":["https://openalex.org/W4289543317","https://openalex.org/W4285070106","https://openalex.org/W4319083788","https://openalex.org/W4224932507","https://openalex.org/W3022038857","https://openalex.org/W4286899287","https://openalex.org/W4360764167","https://openalex.org/W4318621078","https://openalex.org/W3095449511","https://openalex.org/W3141495010"],"abstract_inverted_index":{"We":[0],"present":[1],"MASRL:":[2],"Competitive":[3],"Multi-Agent":[4,98],"Self-supervised":[5],"representations":[6],"for":[7,70],"Reinforcement":[8],"Learning":[9],"in":[10],"the":[11,33,57,63,82,97],"multi-agent":[12],"competitive":[13],"environment.":[14],"MASRL":[15,61,101],"introduces":[16],"a":[17,24,36],"simple":[18],"but":[19,48],"effective":[20,68],"self-supervised":[21],"task:":[22],"predicting":[23],"learning":[25,64],"agent\u2019s":[26],"opponent\u2019s":[27,58],"future":[28,59],"move.":[29],"In":[30],"doing":[31],"this,":[32],"agent":[34,65,83],"learns":[35],"stronger":[37],"representation":[38],"from":[39],"this":[40],"additional":[41],"signal,":[42],"focusing":[43],"not":[44],"only":[45],"on":[46,50],"itself":[47],"also":[49],"its":[51,88],"opponent.":[52],"By":[53],"understanding":[54],"and":[55,80,86],"anticipating":[56],"moves,":[60],"allows":[62,81],"to":[66,84,91],"develop":[67],"strategies":[69],"opponent":[71],"exploitation.":[72],"Our":[73],"method":[74],"stabilizes":[75],"training,":[76],"improves":[77],"sample":[78],"efficiency,":[79],"generalize":[85],"adapt":[87],"playing":[89],"strategy":[90],"other":[92,106],"unseen":[93],"expert":[94],"opponents.":[95],"On":[96],"Atari":[99],"benchmark,":[100],"achieves":[102],"remarkable":[103],"performance,":[104],"outperforming":[105],"strong":[107],"baselines.":[108],"Examples":[109],"of":[110],"demo":[111],"videos":[112],"can":[113],"be":[114],"found":[115],"at:":[116],"https://sites.google.com/view/compmarl":[117]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
