{"id":"https://openalex.org/W2944481713","doi":"https://doi.org/10.1109/cig.2019.8847988","title":"Teaching on a Budget in Multi-Agent Deep Reinforcement Learning","display_name":"Teaching on a Budget in Multi-Agent Deep Reinforcement Learning","publication_year":2019,"publication_date":"2019-08-01","ids":{"openalex":"https://openalex.org/W2944481713","doi":"https://doi.org/10.1109/cig.2019.8847988","mag":"2944481713"},"language":"en","primary_location":{"id":"doi:10.1109/cig.2019.8847988","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cig.2019.8847988","pdf_url":null,"source":{"id":"https://openalex.org/S4306498491","display_name":"2019 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1905.01357","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101585566","display_name":"Erc\u00fcment \u0130lhan","orcid":"https://orcid.org/0000-0003-0400-0043"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Ercument Ilhan","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, United Kingdom","School of Electronic Engineering and Computer Science Queen Mary University of London London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science Queen Mary University of London London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008957116","display_name":"Jeremy Gow","orcid":"https://orcid.org/0009-0004-2768-6898"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jeremy Gow","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, United Kingdom","School of Electronic Engineering and Computer Science Queen Mary University of London London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science Queen Mary University of London London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058274276","display_name":"Diego P\u00e9rez-Li\u00e9bana","orcid":"https://orcid.org/0000-0003-1958-0212"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Diego Perez-Liebana","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, United Kingdom","School of Electronic Engineering and Computer Science Queen Mary University of London London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science Queen Mary University of London London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101585566"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":0.2666,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.59268622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8573752045631409},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7531203031539917},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.667424201965332},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.639017641544342},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6172587275505066},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5480336546897888},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4329402446746826},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4276142716407776},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.404318630695343},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10667073726654053}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8573752045631409},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7531203031539917},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.667424201965332},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.639017641544342},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6172587275505066},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5480336546897888},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4329402446746826},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4276142716407776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.404318630695343},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10667073726654053},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/cig.2019.8847988","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cig.2019.8847988","pdf_url":null,"source":{"id":"https://openalex.org/S4306498491","display_name":"2019 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1905.01357","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.01357","pdf_url":"https://arxiv.org/pdf/1905.01357","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2944481713","is_oa":true,"landing_page_url":"http://arxiv.org/pdf/1905.01357.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1905.01357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1905.01357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1905.01357","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.01357","pdf_url":"https://arxiv.org/pdf/1905.01357","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2944481713.pdf"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1529399279","https://openalex.org/W1589064538","https://openalex.org/W1757796397","https://openalex.org/W2097381042","https://openalex.org/W2098441518","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2341479450","https://openalex.org/W2563829177","https://openalex.org/W2620645529","https://openalex.org/W2735506162","https://openalex.org/W2739573821","https://openalex.org/W2747213132","https://openalex.org/W2921489897","https://openalex.org/W2921955147","https://openalex.org/W2963000099","https://openalex.org/W2963390684","https://openalex.org/W2963658727","https://openalex.org/W2964067469","https://openalex.org/W2964291307","https://openalex.org/W2991093017","https://openalex.org/W3099324303","https://openalex.org/W6631533588","https://openalex.org/W6637967152","https://openalex.org/W6674600207","https://openalex.org/W6674812091","https://openalex.org/W6685388067","https://openalex.org/W6704460705","https://openalex.org/W6712181171","https://openalex.org/W6730844258","https://openalex.org/W6741054924","https://openalex.org/W6744838376","https://openalex.org/W6752059867","https://openalex.org/W6756303580","https://openalex.org/W6760256100"],"related_works":["https://openalex.org/W3165550498","https://openalex.org/W3161195296","https://openalex.org/W2994642062","https://openalex.org/W2397927371","https://openalex.org/W2973029245","https://openalex.org/W2009988656","https://openalex.org/W3106394430","https://openalex.org/W3002608245","https://openalex.org/W3008141518","https://openalex.org/W3121258823","https://openalex.org/W1594545887","https://openalex.org/W3136817238","https://openalex.org/W3200561352","https://openalex.org/W3185202350","https://openalex.org/W3100851130","https://openalex.org/W3112977404","https://openalex.org/W3119115629","https://openalex.org/W3098974658","https://openalex.org/W3158250864","https://openalex.org/W3093342898"],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1,33],"Learning":[2,34],"(RL)":[3],"algorithms":[4],"can":[5,24,84],"solve":[6],"complex":[7],"sequential":[8],"decision":[9],"tasks":[10],"successfully.":[11],"However,":[12,95],"they":[13],"have":[14],"a":[15,45,69,120,135,163],"major":[16],"drawback":[17,37],"of":[18,48,100],"having":[19],"poor":[20],"sample":[21],"efficiency":[22],"which":[23],"often":[25],"be":[26,86,148,173,178],"tackled":[27],"by":[28],"knowledge":[29,52,142],"reuse.":[30],"In":[31,106],"Multi-Agent":[32],"(MARL)":[35],"this":[36,98,107],"becomes":[38],"worse,":[39],"but":[40],"at":[41],"the":[42,152],"same":[43],"time,":[44],"new":[46],"set":[47],"opportunities":[49],"to":[50,88,139,150,177],"leverage":[51],"are":[53,102],"also":[54,85],"presented":[55],"through":[56,68],"agent":[57],"interactions.":[58],"One":[59],"promising":[60,92],"approach":[61,170],"among":[62],"these":[63],"is":[64],"peer-to-peer":[65],"action":[66,112],"advising":[67,113],"teacher-student":[70,153],"framework.":[71],"Despite":[72],"being":[73],"introduced":[74],"for":[75,137],"single-agent":[76],"RL":[77],"originally,":[78],"recent":[79],"studies":[80,96],"show":[81,166],"that":[82,167],"it":[83],"applied":[87],"multi-agent":[89],"scenarios":[90],"with":[91,155],"empirical":[93],"results.":[94],"in":[97,115,143,162],"line":[99],"research":[101],"currently":[103],"very":[104],"limited.":[105],"paper,":[108],"we":[109,133],"propose":[110],"heuristics-based":[111],"techniques":[114],"cooperative":[116],"decentralised":[117],"MARL,":[118],"using":[119],"nonlinear":[121],"function":[122],"approximation":[123],"based":[124],"task-level":[125],"policy.":[126],"By":[127],"adopting":[128],"Random":[129],"Network":[130],"Distillation":[131],"technique,":[132],"devise":[134],"measurement":[136],"agents":[138],"assess":[140],"their":[141],"any":[144],"given":[145],"state":[146],"and":[147,175],"able":[149],"initiate":[151],"dynamics":[154],"no":[156],"prior":[157],"role":[158],"assumptions.":[159],"Experimental":[160],"results":[161],"gridworld":[164],"environment":[165],"such":[168],"an":[169],"may":[171],"indeed":[172],"useful":[174],"needs":[176],"further":[179],"investigated.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
