{"id":"https://openalex.org/W7120011159","doi":"https://doi.org/10.1007/s10458-025-09729-1","title":"Improving scalability of multi-agent deep reinforcement learning with suboptimal human knowledge","display_name":"Improving scalability of multi-agent deep reinforcement learning with suboptimal human knowledge","publication_year":2026,"publication_date":"2026-01-10","ids":{"openalex":"https://openalex.org/W7120011159","doi":"https://doi.org/10.1007/s10458-025-09729-1"},"language":"en","primary_location":{"id":"doi:10.1007/s10458-025-09729-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-025-09729-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-025-09729-1.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10458-025-09729-1.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067813480","display_name":"Dingbang Liu","orcid":"https://orcid.org/0009-0009-4597-1103"},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]},{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU","JP"],"is_corresponding":false,"raw_author_name":"Dingbang Liu","raw_affiliation_strings":["Department of Computer Science, Nagoya Institute of Technology, Nagoya, 466-8555, Aichi, Japan","School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Nagoya Institute of Technology, Nagoya, 466-8555, Aichi, Japan","institution_ids":["https://openalex.org/I197274945"]},{"raw_affiliation_string":"School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080092678","display_name":"Fenghui Ren","orcid":"https://orcid.org/0000-0001-6159-7873"},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Fenghui Ren","raw_affiliation_strings":["School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122512109","display_name":"Jun Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jun Yan","raw_affiliation_strings":["School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060245971","display_name":"Guoxin Su","orcid":"https://orcid.org/0000-0002-2087-4894"},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Guoxin Su","raw_affiliation_strings":["School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Information Technology, University of Wollongong, Wollongong, 2500, New South Wales, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103088878","display_name":"Wen Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wen Gu","raw_affiliation_strings":["Department of Computer Science, Nagoya Institute of Technology, Nagoya, 466-8555, Aichi, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Nagoya Institute of Technology, Nagoya, 466-8555, Aichi, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5121789249","display_name":"Shohei Kato","orcid":null},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shohei Kato","raw_affiliation_strings":["Department of Computer Science, Nagoya Institute of Technology, Nagoya, 466-8555, Aichi, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Nagoya Institute of Technology, Nagoya, 466-8555, Aichi, Japan","institution_ids":["https://openalex.org/I197274945"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5080092678"],"corresponding_institution_ids":["https://openalex.org/I204824540"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06258425,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.32030001282691956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.32030001282691956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08900000154972076,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.06949999928474426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8461999893188477},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6608999967575073},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.583899974822998},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.46239998936653137},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.43950000405311584},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.38119998574256897},{"id":"https://openalex.org/keywords/knowledge-space","display_name":"Knowledge space","score":0.37049999833106995}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8461999893188477},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6977999806404114},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6608999967575073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6563000082969666},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.583899974822998},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5504999756813049},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.46239998936653137},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.43950000405311584},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.38119998574256897},{"id":"https://openalex.org/C2777348039","wikidata":"https://www.wikidata.org/wiki/Q6423397","display_name":"Knowledge space","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.3215000033378601},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2924000024795532},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.28760001063346863},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C84685590","wikidata":"https://www.wikidata.org/wiki/Q1540472","display_name":"Knowledge engineering","level":2,"score":0.26420000195503235}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10458-025-09729-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-025-09729-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-025-09729-1.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10458-025-09729-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-025-09729-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-025-09729-1.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320969","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7120011159.pdf","grobid_xml":"https://content.openalex.org/works/W7120011159.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W1580672106","https://openalex.org/W2062525454","https://openalex.org/W2571699337","https://openalex.org/W2604382266","https://openalex.org/W2604847142","https://openalex.org/W2617547828","https://openalex.org/W2747213132","https://openalex.org/W2768629321","https://openalex.org/W2908261578","https://openalex.org/W2915117209","https://openalex.org/W2921955147","https://openalex.org/W2963890729","https://openalex.org/W2965723696","https://openalex.org/W2987046690","https://openalex.org/W2991046523","https://openalex.org/W2997536466","https://openalex.org/W3002044607","https://openalex.org/W3016335401","https://openalex.org/W3034311880","https://openalex.org/W3089192431","https://openalex.org/W3096763169","https://openalex.org/W3106649810","https://openalex.org/W3156295478","https://openalex.org/W3170719173","https://openalex.org/W3183523618","https://openalex.org/W3198359393","https://openalex.org/W4283714159","https://openalex.org/W4285034152","https://openalex.org/W4297098388","https://openalex.org/W4297726143"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Due":[1],"to":[2,49,54,112,123,138,143,153,167,176],"its":[3,125],"exceptional":[4],"learning":[5,10,20,45,76,149],"ability,":[6],"multi-agent":[7,85],"deep":[8],"reinforcement":[9],"(MADRL)":[11],"has":[12],"garnered":[13],"widespread":[14],"research":[15],"interest.":[16],"However,":[17],"since":[18],"the":[19,43,60,64,72,83,96,118,144,154,173,181,185,193,207,236],"is":[21,36,212],"data-driven":[22],"and":[23,39,161,230],"involves":[24],"sampling":[25],"from":[26,52,57],"millions":[27],"of":[28,34,74,99,120,135,172,184,238],"steps,":[29],"training":[30,97,208],"a":[31,217],"large":[32,218],"number":[33],"agents":[35,166],"inherently":[37],"challenging":[38,182],"inefficient.":[40],"Inspired":[41],"by":[42,77],"human":[44,80,89,93,141,178,232],"process,":[46,209],"we":[47,91,115,131,199],"aim":[48],"transfer":[50],"knowledge":[51,81,94,122,136,142,157,204,211],"humans":[53,160],"avoid":[55],"starting":[56],"scratch.":[58],"Given":[59],"growing":[61],"emphasis":[62],"on":[63,70,127],"Human-on-the-Loop":[65],"concept,":[66],"this":[67,223],"study":[68,224],"focuses":[69],"addressing":[71,216],"challenges":[73],"large-population":[75],"incorporating":[78],"suboptimal":[79],"into":[82,95,228],"cooperative":[84],"environment.":[86],"To":[87],"leverage":[88,177],"experience,":[90],"integrate":[92],"process":[98],"MADRL,":[100],"representing":[101],"it":[102],"in":[103,156,196],"natural":[104],"language":[105],"rather":[106],"than":[107],"specific":[108],"action-state":[109],"pairs.":[110],"Compared":[111],"previous":[113],"works,":[114],"further":[116],"consider":[117],"attributes":[119],"transferred":[121],"assess":[124],"impact":[126],"algorithm":[128],"scalability.":[129],"Additionally,":[130],"examine":[132],"several":[133],"features":[134],"mapping":[137,231],"effectively":[139],"convert":[140],"action":[145],"space":[146,175],"where":[147],"agent":[148,219,239],"occurs.":[150],"In":[151],"reaction":[152],"disparity":[155],"construction":[158],"between":[159],"agents,":[162],"our":[163,189],"approach":[164],"allows":[165],"decide":[168],"freely":[169],"which":[170],"portions":[171],"state":[174],"knowledge.":[179],"From":[180],"domains":[183],"StarCraft":[186],"Multi-agent":[187],"Challenge,":[188],"method":[190],"successfully":[191],"alleviates":[192],"scalability":[194],"issue":[195],"MADRL.":[197],"Furthermore,":[198],"find":[200],"that,":[201],"despite":[202],"individual-type":[203],"significantly":[205],"accelerating":[206],"cooperative-type":[210],"more":[213],"desirable":[214],"for":[215],"population.":[220],"We":[221],"hope":[222],"provides":[225],"valuable":[226],"insights":[227],"applying":[229],"knowledge,":[233],"ultimately":[234],"enhancing":[235],"interpretability":[237],"behavior.":[240]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-01-10T00:00:00"}
