{"id":"https://openalex.org/W4410428272","doi":"https://doi.org/10.1109/tcds.2025.3570497","title":"Hybrid Actor\u2013Critic for Physically Heterogeneous Multiagent Reinforcement Learning","display_name":"Hybrid Actor\u2013Critic for Physically Heterogeneous Multiagent Reinforcement Learning","publication_year":2025,"publication_date":"2025-05-16","ids":{"openalex":"https://openalex.org/W4410428272","doi":"https://doi.org/10.1109/tcds.2025.3570497"},"language":"en","primary_location":{"id":"doi:10.1109/tcds.2025.3570497","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2025.3570497","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tianyi Hu","orcid":"https://orcid.org/0009-0003-2570-2287"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianyi Hu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-2570-2287","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060319097","display_name":"Zhiqiang Pu","orcid":"https://orcid.org/0000-0002-4841-4048"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiqiang Pu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4841-4048","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047050212","display_name":"Xiaolin Ai","orcid":"https://orcid.org/0000-0001-7943-8336"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolin Ai","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7943-8336","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043281824","display_name":"Tenghai Qiu","orcid":"https://orcid.org/0000-0002-0312-5728"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tenghai Qiu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0312-5728","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062981171","display_name":"Yanyan Liang","orcid":"https://orcid.org/0000-0002-5780-8540"},"institutions":[{"id":"https://openalex.org/I111950717","display_name":"Macau University of Science and Technology","ror":"https://ror.org/03jqs2n27","country_code":"MO","type":"education","lineage":["https://openalex.org/I111950717","https://openalex.org/I4391767947"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Yanyan Liang","raw_affiliation_strings":["Macau University of Science and Technology, Taipa, Macau"],"raw_orcid":"https://orcid.org/0000-0002-5780-8540","affiliations":[{"raw_affiliation_string":"Macau University of Science and Technology, Taipa, Macau","institution_ids":["https://openalex.org/I111950717"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019541846","display_name":"Jianqiang Yi","orcid":"https://orcid.org/0000-0003-3268-9482"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqiang Yi","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3268-9482","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":2.1377,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88258889,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"17","issue":"6","first_page":"1520","last_page":"1535"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4235000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4235000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.4059999883174896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.39980000257492065,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.841341495513916},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8091555833816528},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.5157495141029358},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4253501892089844}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.841341495513916},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8091555833816528},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5157495141029358},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4253501892089844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcds.2025.3570497","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2025.3570497","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.46000000834465027}],"awards":[{"id":"https://openalex.org/G1552939569","display_name":null,"funder_award_id":"20220484077","funder_id":"https://openalex.org/F4320334978","funder_display_name":"Beijing Nova Program"},{"id":"https://openalex.org/G5354033369","display_name":null,"funder_award_id":"20230484435","funder_id":"https://openalex.org/F4320334978","funder_display_name":"Beijing Nova Program"},{"id":"https://openalex.org/G6672390475","display_name":null,"funder_award_id":"62322316","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334978","display_name":"Beijing Nova Program","ror":"https://ror.org/034k14f91"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1992935239","https://openalex.org/W2025406794","https://openalex.org/W2602275733","https://openalex.org/W2747213132","https://openalex.org/W2936516903","https://openalex.org/W2951360122","https://openalex.org/W2968526727","https://openalex.org/W2982316857","https://openalex.org/W2997502221","https://openalex.org/W3039502116","https://openalex.org/W3088335680","https://openalex.org/W3111843786","https://openalex.org/W3173294282","https://openalex.org/W3176265013","https://openalex.org/W4296471407","https://openalex.org/W4323030862","https://openalex.org/W4388201183","https://openalex.org/W4390120052","https://openalex.org/W4395030794","https://openalex.org/W4398226357","https://openalex.org/W4410090343","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109"],"abstract_inverted_index":{"This":[0],"paper":[1],"focuses":[2],"on":[3],"cooperative":[4],"policy":[5,50,86],"learning":[6,87,200],"for":[7,43,84],"physically":[8],"heterogeneous":[9,49,79,160],"multi-agent":[10],"system":[11],"(PHet-MAS),":[12],"where":[13],"agents":[14,53],"have":[15],"different":[16],"observation":[17],"spaces,":[18],"action":[19],"spaces":[20],"and":[21,62,99,107,134,176,195,202],"local":[22],"state":[23],"transitions.":[24],"Due":[25],"to":[26,38,119,151,170],"the":[27,56,64,71,85,120,127,136,142,153],"various":[28],"input-output":[29],"structures":[30],"of":[31,59,66,88,92,95,124,179],"agents\u2019":[32],"policies":[33],"in":[34,173,199],"PHet-MAS,":[35],"it\u2019s":[36],"difficult":[37],"employ":[39],"parameter":[40],"sharing":[41],"techniques":[42],"sample":[44,174],"efficiency.":[45],"Moreover,":[46],"a":[47,82,96,100,145,163],"totally":[48],"design":[51],"impedes":[52],"from":[54],"utilizing":[55],"training":[57,166,177],"experience":[58],"their":[60],"companions,":[61],"increases":[63],"risk":[65],"environmental":[67,154],"non-stationarity.":[68],"To":[69],"address":[70,171],"above":[72],"issues,":[73],"we":[74],"propose":[75],"<italic":[76],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[77],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">hybrid":[78],"actor-critic</i>":[80],"(HHAC),":[81],"method":[83,167],"PHet-MAS.":[89],"The":[90,111],"framework":[91],"HHAC":[93,187],"consists":[94],"hybrid":[97,101,143,164,180],"actor":[98],"critic,":[102,144],"both":[103],"containing":[104],"globally":[105,128],"shared":[106,109,113,129],"locally":[108,112],"modules.":[110],"modules":[114,130],"can":[115,131,196],"be":[116],"customized":[117],"according":[118],"actual":[121],"physical":[122],"properties":[123],"agents,":[125],"while":[126],"help":[132],"extract":[133],"utilize":[135],"common":[137],"information":[138],"among":[139],"agents.":[140],"In":[141],"behavioral":[146],"intention":[147],"module":[148],"is":[149,168],"designed":[150],"alleviate":[152],"non-stationary":[155],"issue":[156],"caused":[157],"by":[158,184],"evolving":[159],"policies.":[161,204],"Finally,":[162],"network":[165],"developed":[169],"challenges":[172],"construction":[175],"stability":[178],"networks.":[181],"As":[182],"evidenced":[183],"experimental":[185],"results,":[186],"exhibits":[188],"superior":[189],"performance":[190],"enhancements":[191],"over":[192],"baseline":[193],"approaches,":[194],"facilitate":[197],"PHet-MAS":[198],"sophisticated":[201],"instructive":[203]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
