{"id":"https://openalex.org/W2804726947","doi":"https://doi.org/10.1609/aaai.v33i01.33016128","title":"Learning to Teach in Cooperative Multiagent Reinforcement Learning","display_name":"Learning to Teach in Cooperative Multiagent Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2804726947","doi":"https://doi.org/10.1609/aaai.v33i01.33016128","mag":"2804726947"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v33i01.33016128","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33016128","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4570/4448","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4570/4448","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052169592","display_name":"Shayegan Omidshafiei","orcid":"https://orcid.org/0000-0001-7758-1454"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shayegan Omidshafiei","raw_affiliation_strings":["Massachusetts Institute of Technology","Massachusetts Institute Of Technology#TAB#"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute Of Technology#TAB#","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033449275","display_name":"Dong Ki Kim","orcid":"https://orcid.org/0000-0002-5195-7852"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong-Ki Kim","raw_affiliation_strings":["Massachusetts Institute of Technology","Massachusetts Institute Of Technology#TAB#"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute Of Technology#TAB#","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100348904","display_name":"Miao Liu","orcid":"https://orcid.org/0000-0002-6650-9972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao Liu","raw_affiliation_strings":["IBM"],"affiliations":[{"raw_affiliation_string":"IBM","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113551673","display_name":"Gerald Tesauro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gerald Tesauro","raw_affiliation_strings":["IBM Research","IBM research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"IBM research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037927240","display_name":"Matthew Riemer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthew Riemer","raw_affiliation_strings":["IBM Research","IBM research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"IBM research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033129735","display_name":"Christopher Amato","orcid":"https://orcid.org/0000-0002-6786-7384"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Christopher Amato","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045892503","display_name":"Murray Campbell","orcid":"https://orcid.org/0000-0001-8158-894X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Murray Campbell","raw_affiliation_strings":["IBM Research","IBM research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"IBM research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011665886","display_name":"Jonathan P. How","orcid":"https://orcid.org/0000-0001-8576-1930"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan P. How","raw_affiliation_strings":["Massachusetts Institute of Technology","Massachusetts Institute Of Technology#TAB#"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute Of Technology#TAB#","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5052169592"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":1.8649,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.88630229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":"33","issue":"01","first_page":"6128","last_page":"6136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7044459581375122},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7028370499610901},{"id":"https://openalex.org/keywords/cooperative-learning","display_name":"Cooperative learning","score":0.6051803231239319},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.5099881887435913},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.48374199867248535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4270590543746948},{"id":"https://openalex.org/keywords/collaborative-learning","display_name":"Collaborative learning","score":0.420504093170166},{"id":"https://openalex.org/keywords/teaching-method","display_name":"Teaching method","score":0.3177635967731476},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.31355810165405273},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.26828399300575256},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11993035674095154}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7044459581375122},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7028370499610901},{"id":"https://openalex.org/C51672120","wikidata":"https://www.wikidata.org/wiki/Q303446","display_name":"Cooperative learning","level":3,"score":0.6051803231239319},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.5099881887435913},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.48374199867248535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4270590543746948},{"id":"https://openalex.org/C138020889","wikidata":"https://www.wikidata.org/wiki/Q2349659","display_name":"Collaborative learning","level":2,"score":0.420504093170166},{"id":"https://openalex.org/C88610354","wikidata":"https://www.wikidata.org/wiki/Q1813494","display_name":"Teaching method","level":2,"score":0.3177635967731476},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.31355810165405273},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.26828399300575256},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11993035674095154},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1609/aaai.v33i01.33016128","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33016128","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4570/4448","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1805.07830","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.07830","pdf_url":"https://arxiv.org/pdf/1805.07830","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2804726947","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1805.07830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1805.07830","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1805.07830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v33i01.33016128","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33016128","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4570/4448","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5899999737739563,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2891890304","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320322357","funder_display_name":"Kwanjeong Educational Foundation"}],"funders":[{"id":"https://openalex.org/F4320310598","display_name":"Amazon Web Services","ror":"https://ror.org/04mv4n011"},{"id":"https://openalex.org/F4320322357","display_name":"Kwanjeong Educational Foundation","ror":"https://ror.org/05c5x8342"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2804726947.pdf","grobid_xml":"https://content.openalex.org/works/W2804726947.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1529399279","https://openalex.org/W1589064538","https://openalex.org/W1969685488","https://openalex.org/W1986014385","https://openalex.org/W1990513740","https://openalex.org/W1999874108","https://openalex.org/W2061562262","https://openalex.org/W2121696237","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2296073425","https://openalex.org/W2395575420","https://openalex.org/W2410842990","https://openalex.org/W2547875792","https://openalex.org/W2563829177","https://openalex.org/W2601465345","https://openalex.org/W2620645529","https://openalex.org/W2623431351","https://openalex.org/W2735506162","https://openalex.org/W2740275380","https://openalex.org/W2742485676","https://openalex.org/W2785494456","https://openalex.org/W2786471719","https://openalex.org/W2807884652","https://openalex.org/W2963000099","https://openalex.org/W2963289505","https://openalex.org/W2963696295","https://openalex.org/W2964121744","https://openalex.org/W2964327384","https://openalex.org/W2964338167","https://openalex.org/W3099324303","https://openalex.org/W6646884813","https://openalex.org/W6647975832","https://openalex.org/W6696904280","https://openalex.org/W6730844258","https://openalex.org/W6735698609","https://openalex.org/W6736021936","https://openalex.org/W6741776117","https://openalex.org/W6744562401","https://openalex.org/W6748208641"],"related_works":["https://openalex.org/W2963390684","https://openalex.org/W2921489897","https://openalex.org/W2013413939","https://openalex.org/W2945579124","https://openalex.org/W3037724942","https://openalex.org/W2575472443","https://openalex.org/W3005521394","https://openalex.org/W2274872746","https://openalex.org/W1500448197","https://openalex.org/W2995453501","https://openalex.org/W2786471719","https://openalex.org/W3120127704","https://openalex.org/W3209811075","https://openalex.org/W2404786126","https://openalex.org/W3147364449","https://openalex.org/W2946669606","https://openalex.org/W3107227009","https://openalex.org/W1589305404","https://openalex.org/W3012544020","https://openalex.org/W2243766736"],"abstract_inverted_index":{"Collective":[0],"human":[1,21],"knowledge":[2,36],"has":[3,48,83],"clearly":[4],"benefited":[5],"from":[6,32],"the":[7,95,108,156,174,181],"fact":[8],"that":[9,59,93,203],"innovations":[10],"by":[11,51],"individuals":[12],"are":[13,166],"taught":[14],"to":[15,20,34,44,64,80,87,102,117,119,128,152,159,172,190,215],"others":[16],"through":[17],"communication.":[18],"Similar":[19],"social":[22],"groups,":[23],"agents":[24,116,170,206],"in":[25,121,137,144,188,217],"distributed":[26],"learning":[27,47,79],"systems":[28],"would":[29],"likely":[30],"benefit":[31],"communication":[33],"share":[35],"and":[37,112,130,150,185,194],"teach":[38,81,120],"skills.":[39],"The":[40],"problem":[41,82],"of":[42,62,91,176],"teaching":[43,63,92,136,200,205],"improve":[45,160,191],"agent":[46,143],"been":[49],"investigated":[50],"prior":[52],"works,":[53,104],"but":[54,212],"these":[55,164,169],"approaches":[56],"make":[57],"assumptions":[58],"prevent":[60],"application":[61],"general":[65,110],"multiagent":[66,97,123,139],"problems,":[67],"or":[68],"require":[69],"domain":[70],"expertise":[71],"for":[72,114],"problems":[73],"they":[74],"can":[75],"apply":[76],"to.":[77],"This":[78],"inherent":[84],"complexities":[85],"related":[86],"measuring":[88],"long-term":[89],"impacts":[90],"compound":[94],"standard":[96],"coordination":[98],"challenges.":[99],"In":[100],"contrast":[101],"existing":[103,220],"this":[105],"paper":[106],"presents":[107],"first":[109],"framework":[111],"algorithm":[113],"intelligent":[115],"learn":[118,171,209,214],"a":[122],"environment.":[124],"Our":[125],"algorithm,":[126],"Learning":[127],"Coordinate":[129],"Teach":[131],"Reinforcement":[132],"(LeCTR),":[133],"addresses":[134],"peer-to-peer":[135],"cooperative":[138],"reinforcement":[140],"learning.":[141,162,195],"Each":[142],"our":[145,204],"approach":[146],"learns":[147],"both":[148],"when":[149],"what":[151],"advise,":[153],"then":[154],"uses":[155],"received":[157],"advice":[158,187],"local":[161],"Importantly,":[163],"roles":[165],"not":[167,207],"fixed;":[168],"assume":[173],"role":[175],"student":[177],"and/or":[178],"teacher":[179],"at":[180],"appropriate":[182],"moments,":[183],"requesting":[184],"providing":[186],"order":[189],"teamwide":[192],"performance":[193],"Empirical":[196],"comparisons":[197],"against":[198],"state-of-the-art":[199],"methods":[201,221],"show":[202],"only":[208],"significantly":[210],"faster,":[211],"also":[213],"coordinate":[216],"tasks":[218],"where":[219],"fail.":[222]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
