{"id":"https://openalex.org/W4386869541","doi":"https://doi.org/10.1109/tg.2023.3316697","title":"Deep Multitask Multiagent Reinforcement Learning With Knowledge Transfer","display_name":"Deep Multitask Multiagent Reinforcement Learning With Knowledge Transfer","publication_year":2023,"publication_date":"2023-09-19","ids":{"openalex":"https://openalex.org/W4386869541","doi":"https://doi.org/10.1109/tg.2023.3316697"},"language":"en","primary_location":{"id":"doi:10.1109/tg.2023.3316697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2023.3316697","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068063235","display_name":"Yuxiang Mai","orcid":"https://orcid.org/0000-0003-3478-0335"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxiang Mai","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China","Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049451354","display_name":"Yifan Zang","orcid":"https://orcid.org/0000-0003-4537-384X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Zang","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China","Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083652259","display_name":"Qiyue Yin","orcid":"https://orcid.org/0000-0002-3442-6275"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyue Yin","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China","Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111924466","display_name":"Wancheng Ni","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wancheng Ni","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China","Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028693655","display_name":"Kaiqi Huang","orcid":"https://orcid.org/0000-0002-2677-9273"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210097554","display_name":"Center for Excellence in Brain Science and Intelligence Technology","ror":"https://ror.org/00vpwhm04","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210097554"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiqi Huang","raw_affiliation_strings":["Center for Research on Intelligent System and Engineering and the National Laboratory of Pattern Recognition, Institute of Automaiton, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China","CAS Center for Excellence in Brain Science and Intelligence Technology, China"],"affiliations":[{"raw_affiliation_string":"Center for Research on Intelligent System and Engineering and the National Laboratory of Pattern Recognition, Institute of Automaiton, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"CAS Center for Excellence in Brain Science and Intelligence Technology, China","institution_ids":["https://openalex.org/I4210097554"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068063235"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.0876,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89719204,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"16","issue":"3","first_page":"566","last_page":"576"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.925000011920929,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7368078827857971},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6813220977783203},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6294692754745483},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5901772975921631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5172289609909058},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.43275201320648193},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4187217652797699},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32971182465553284},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.2939910888671875},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14618518948554993},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.08035612106323242}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368078827857971},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6813220977783203},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6294692754745483},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5901772975921631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5172289609909058},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.43275201320648193},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4187217652797699},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32971182465553284},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2939910888671875},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14618518948554993},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.08035612106323242}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tg.2023.3316697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2023.3316697","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W3196817267","https://openalex.org/W1976600725"],"abstract_inverted_index":{"Despite":[0],"the":[1,75,90,105,116,148,161,164,169],"potential":[2],"of":[3,17,57,92,155,163],"Multi-Agent":[4],"Reinforcement":[5],"Learning":[6],"(MARL)":[7],"in":[8,42,63],"addressing":[9],"numerous":[10],"complex":[11],"tasks,":[12],"training":[13,99],"a":[14,27,34,54,68,81,152],"single":[15,55],"team":[16,24,56,82,154],"MARL":[18,44],"agents":[19,58],"to":[20,59],"handle":[21],"multiple":[22,64],"diverse":[23],"tasks":[25],"remains":[26],"challenge.":[28],"In":[29],"this":[30,98],"paper,":[31],"we":[32,96],"introduce":[33],"novel":[35],"Multi-task":[36],"method":[37],"based":[38],"on":[39],"Knowledge":[40],"Transfer":[41],"cooperative":[43],"(MKT-MARL).":[45],"By":[46],"learning":[47,107],"from":[48,89,108],"task-specific":[49,93],"teachers,":[50],"our":[51,126,144],"approach":[52],"empowers":[53],"attain":[60],"expert-level":[61],"performance":[62],"tasks.":[65],"MKT-MARL":[66],"utilizes":[67],"knowledge":[69,88,166],"distillation":[70,109],"algorithm":[71,127,145],"specifically":[72],"designed":[73],"for":[74],"multi-agent":[76,136],"architecture,":[77],"which":[78],"rapidly":[79],"learns":[80],"control":[83],"policy":[84],"incorporating":[85],"common":[86],"coordinated":[87],"experience":[91],"teachers.":[94,122],"Additionally,":[95],"enhance":[97],"with":[100],"teacher":[101,170],"annealing,":[102],"gradually":[103],"shifting":[104],"model's":[106],"towards":[110],"environmental":[111],"rewards.":[112],"This":[113],"enhancement":[114],"helps":[115],"multi-task":[117],"model":[118],"surpass":[119],"its":[120],"single-task":[121,149],"We":[123],"extensively":[124],"evaluate":[125],"using":[128],"two":[129],"commonly-used":[130],"benchmarks:":[131],"StarCraft":[132],"II":[133],"micro-management":[134],"and":[135,151,168],"particle":[137],"environment.":[138],"The":[139],"experimental":[140],"results":[141],"demonstrate":[142],"that":[143],"outperforms":[146],"both":[147],"teachers":[150],"jointly-trained":[153],"agents.":[156],"Extensive":[157],"ablation":[158],"experiments":[159],"illustrate":[160],"effectiveness":[162],"supervised":[165],"transfer":[167],"annealing":[171],"strategy.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
