{"id":"https://openalex.org/W7129011864","doi":"https://doi.org/10.1109/tpami.2026.3664855","title":"A Generic Competitive-Cooperative Actor-Critic Framework for Deep Reinforcement Learning","display_name":"A Generic Competitive-Cooperative Actor-Critic Framework for Deep Reinforcement Learning","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7129011864","doi":"https://doi.org/10.1109/tpami.2026.3664855","pmid":"https://pubmed.ncbi.nlm.nih.gov/41697803"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2026.3664855","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2026.3664855","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Meng Xu","orcid":"https://orcid.org/0000-0003-4857-5439"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Meng Xu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0003-4857-5439","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101700401","display_name":"Zihao Wen","orcid":"https://orcid.org/0000-0002-5401-5393"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zihao Wen","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-5401-5393","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071933192","display_name":"Xinhong Chen","orcid":"https://orcid.org/0000-0002-8563-148X"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xinhong Chen","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-8563-148X","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100628454","display_name":"Guanyi Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Guanyi Zhao","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126145220","display_name":"Jin Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Huang","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8774-2936","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126094207","display_name":"Jianping Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jianping Wang","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-9318-1482","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20587459,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"48","issue":"6","first_page":"7112","last_page":"7128"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8658999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8658999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.008500000461935997,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.007699999958276749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8273000121116638},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5690000057220459},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.551800012588501},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4372999966144562},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.42730000615119934},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42239999771118164}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8273000121116638},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7322999835014343},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.651199996471405},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5690000057220459},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.551800012588501},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5083000063896179},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4372999966144562},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42730000615119934},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42239999771118164},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.3801000118255615},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.2824999988079071},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2635999917984009}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2026.3664855","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2026.3664855","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41697803","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41697803","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W3046814132","https://openalex.org/W3088798727","https://openalex.org/W3091795298","https://openalex.org/W3091958244","https://openalex.org/W3127177108","https://openalex.org/W3127561923","https://openalex.org/W3170872007","https://openalex.org/W3199711351","https://openalex.org/W3202264206","https://openalex.org/W3205598877","https://openalex.org/W4280508537","https://openalex.org/W4285009743","https://openalex.org/W4291393104","https://openalex.org/W4308211465","https://openalex.org/W4313059722","https://openalex.org/W4313362050","https://openalex.org/W4379620511","https://openalex.org/W4384644208","https://openalex.org/W4385286429","https://openalex.org/W4385566404","https://openalex.org/W4385737557","https://openalex.org/W4385863710","https://openalex.org/W4387789518","https://openalex.org/W4391131278","https://openalex.org/W4391697075","https://openalex.org/W4391912934","https://openalex.org/W4391974559","https://openalex.org/W4392347372","https://openalex.org/W4393159991","https://openalex.org/W4396506409","https://openalex.org/W4396605204","https://openalex.org/W4399541205","https://openalex.org/W4399665988","https://openalex.org/W4399767971","https://openalex.org/W4401452744","https://openalex.org/W4403923016","https://openalex.org/W4405785491","https://openalex.org/W4407638197","https://openalex.org/W4409659344","https://openalex.org/W4411688074","https://openalex.org/W4413155594","https://openalex.org/W4417248875","https://openalex.org/W7124313229","https://openalex.org/W7133236636"],"related_works":[],"abstract_inverted_index":{"In":[0],"the":[1,13,56,93,102,108,124,130,136,142,152],"field":[2],"of":[3,15,32,141,160],"Deep":[4],"reinforcement":[5],"learning":[6,60,91],"(DRL),":[7],"enhancing":[8],"exploration":[9,40],"capabilities":[10],"and":[11,41,61,110,163,204],"improving":[12],"accuracy":[14],"Q-value":[16,42],"estimation":[17,150],"remain":[18],"two":[19,157],"major":[20],"challenges.":[21],"Recently,":[22],"double-actor":[23,46,85,168,194],"DRL":[24,33,47,86,169,173,190,195],"methods":[25,48,87,170],"have":[26],"emerged":[27],"as":[28,114,139,200],"a":[29,75,115],"promising":[30],"class":[31],"approaches,":[34],"achieving":[35],"substantial":[36],"advancements":[37],"in":[38,54,104,132,148],"both":[39],"estimation.":[43],"However,":[44],"existing":[45,84],"feature":[49],"actors":[50,94,109],"that":[51,78,182],"operate":[52],"independently":[53],"exploring":[55],"environment,":[57],"lacking":[58],"mutual":[59,90,121],"collaboration,":[62],"which":[63],"leads":[64],"to":[65,95,119,171,175],"suboptimal":[66],"policies.":[67,98],"To":[68],"address":[69],"this":[70,72,112],"challenge,":[71],"work":[73],"proposes":[74],"generic":[76],"solution":[77],"can":[79],"be":[80],"seamlessly":[81],"integrated":[82],"into":[83],"by":[88,107,135,202],"promoting":[89],"among":[92,123],"develop":[96],"improved":[97],"Specifically,":[99],"we":[100,127],"calculate":[101],"difference":[103,113],"actions":[105],"output":[106,134],"minimize":[111,129],"loss":[116],"during":[117],"training":[118],"facilitate":[120],"imitation":[122],"actors.":[125],"Simultaneously,":[126],"also":[128],"differences":[131],"Q-values":[133],"various":[137],"critics":[138],"part":[140],"loss,":[143],"thereby":[144],"avoiding":[145],"significant":[146],"discrepancies":[147],"value":[149],"for":[151],"imitated":[153],"actions.":[154],"We":[155],"present":[156],"specific":[158],"implementations":[159,166],"our":[161,183],"method":[162,184],"extend":[164],"these":[165],"beyond":[167],"other":[172,205],"approaches":[174],"encourage":[176],"broader":[177],"adoption.":[178],"Experimental":[179],"results":[180],"demonstrate":[181],"significantly":[185],"improves":[186],"twenty":[187],"state-of-the-art":[188],"(SOTA)":[189],"methods,":[191,196],"including":[192],"SOTA":[193],"across":[197],"eleven":[198],"tasks,":[199],"measured":[201],"return":[203],"metrics.":[206]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-17T00:00:00"}
