{"id":"https://openalex.org/W4385484771","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191476","title":"Explicitly Learning Policy Under Partial Observability in Multiagent Reinforcement Learning","display_name":"Explicitly Learning Policy Under Partial Observability in Multiagent Reinforcement Learning","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385484771","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191476"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10191476","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100350503","display_name":"Yang Chen","orcid":"https://orcid.org/0000-0003-4749-3060"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chen Yang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences","Chinese Academy of Sciences, Institute of Automation, Beijing, P.R. China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, P.R. China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088679980","display_name":"Guangkai Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangkai Yang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences","Chinese Academy of Sciences, Institute of Automation, Beijing, P.R. China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, P.R. China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100353673","display_name":"Hao Chen","orcid":"https://orcid.org/0009-0001-6480-7976"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Chen","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109012644","display_name":"Junge Zhang","orcid":"https://orcid.org/0000-0002-9970-394X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junge Zhang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences","Chinese Academy of Sciences, Institute of Automation, Beijing, P.R. China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, P.R. China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100350503"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.348,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65171717,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.9904084205627441},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7331375479698181},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6245203018188477},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.601526141166687},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4697689712047577},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41460418701171875},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35919755697250366},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3435176610946655},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21388041973114014},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18994027376174927},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.10231679677963257}],"concepts":[{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.9904084205627441},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7331375479698181},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6245203018188477},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.601526141166687},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4697689712047577},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41460418701171875},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35919755697250366},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3435176610946655},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21388041973114014},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18994027376174927},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.10231679677963257},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10191476","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G290393051","display_name":null,"funder_award_id":"JCPYJJ-22017","funder_id":"https://openalex.org/F4320332238","funder_display_name":"Center for Africana Studies, Johns Hopkins University"},{"id":"https://openalex.org/G7971092310","display_name":null,"funder_award_id":"QYZDB-SSW-JSC006","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"},{"id":"https://openalex.org/G852446924","display_name":null,"funder_award_id":"61876181","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320332238","display_name":"Center for Africana Studies, Johns Hopkins University","ror":"https://ror.org/00za53h95"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1522301498","https://openalex.org/W1598140581","https://openalex.org/W2012812921","https://openalex.org/W2099873296","https://openalex.org/W2134797427","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2166533447","https://openalex.org/W2292533394","https://openalex.org/W2407386500","https://openalex.org/W2546571074","https://openalex.org/W2588790649","https://openalex.org/W2617547828","https://openalex.org/W2626637010","https://openalex.org/W2747213132","https://openalex.org/W2749807327","https://openalex.org/W2903910345","https://openalex.org/W2963573053","https://openalex.org/W2970272688","https://openalex.org/W2991046523","https://openalex.org/W2996037775","https://openalex.org/W3003498399","https://openalex.org/W3004640943","https://openalex.org/W3046288222","https://openalex.org/W3094349299","https://openalex.org/W3176812291","https://openalex.org/W4214717370","https://openalex.org/W4287755265","https://openalex.org/W4288090372","https://openalex.org/W4288594419","https://openalex.org/W4295364049","https://openalex.org/W4298857966","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6637967152","https://openalex.org/W6674711614","https://openalex.org/W6677939520","https://openalex.org/W6679909955","https://openalex.org/W6683204974","https://openalex.org/W6719700797","https://openalex.org/W6728994542","https://openalex.org/W6732837357","https://openalex.org/W6739516088","https://openalex.org/W6743756900","https://openalex.org/W6749304979","https://openalex.org/W6755920956","https://openalex.org/W6758846586","https://openalex.org/W6762491519","https://openalex.org/W6767151588","https://openalex.org/W6768842719","https://openalex.org/W6772005887","https://openalex.org/W6773620346","https://openalex.org/W6781750019","https://openalex.org/W6784046417","https://openalex.org/W6784152626","https://openalex.org/W6864424756"],"related_works":["https://openalex.org/W2046459260","https://openalex.org/W2765830098","https://openalex.org/W2967463586","https://openalex.org/W2074679142","https://openalex.org/W1971989957","https://openalex.org/W2517338020","https://openalex.org/W3157641275","https://openalex.org/W4312300846","https://openalex.org/W2104042711","https://openalex.org/W4206221578"],"abstract_inverted_index":{"We":[0],"explore":[1],"explicit":[2],"solutions":[3],"for":[4,117,145],"multiagent":[5],"reinforcement":[6],"learning":[7],"(MARL)":[8],"under":[9,85,114],"the":[10,63,69,134,166],"constraint":[11],"of":[12,19,65,71,79,136,168],"partial":[13,30,46,66,86,115,172],"observability.":[14,87],"With":[15],"a":[16,91,101,126],"general":[17,128],"framework":[18],"centralized":[20,37],"training":[21,131],"with":[22],"decentralized":[23],"execution":[24],"(CTDE),":[25],"existing":[26,143],"methods":[27,144],"implicitly":[28],"alleviate":[29],"observability":[31,47,67,116,173],"by":[32],"introducing":[33],"global":[34],"information":[35],"during":[36],"training.":[38],"However,":[39],"such":[40],"implicit":[41],"solution":[42],"cannot":[43],"well":[44],"address":[45],"and":[48,73,105,129,133,174],"shows":[49],"low":[50],"sample":[51,176],"efficiency":[52],"in":[53,170],"many":[54],"MARL":[55,83],"problems.":[56],"In":[57],"this":[58],"paper,":[59],"we":[60,89],"focus":[61],"on":[62,68,151],"influence":[64],"policy":[70,80,113],"agents,":[72],"formally":[74],"derive":[75],"an":[76],"ideal":[77],"form":[78],"that":[81,157],"maximizes":[82],"objective":[84],"Furthermore,":[88],"develop":[90],"new":[92],"method":[93],"named":[94],"Explicitly":[95],"Learning":[96],"Policy":[97],"(ELP),":[98],"which":[99,164],"adopts":[100],"novel":[102],"teacher-student":[103],"structure":[104],"utilizes":[106],"knowledge":[107],"distillation":[108],"to":[109,121,142],"explicitly":[110],"learn":[111],"individual":[112],"each":[118],"agent.":[119],"Compared":[120],"prior":[122],"methods,":[123],"ELP":[124,137,158,169],"presents":[125],"more":[127],"interpretable":[130],"process,":[132],"procedure":[135],"can":[138],"be":[139],"easily":[140],"extended":[141],"performance":[146],"boost.":[147],"Our":[148],"empirical":[149],"experiments":[150],"StarCraft":[152],"II":[153],"micromanagement":[154],"benchmark":[155],"show":[156],"significantly":[159],"outperforms":[160],"prevailing":[161],"state-of-the-art":[162],"baselines,":[163],"demonstrates":[165],"advantage":[167],"addressing":[171],"improving":[175],"efficiency.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
