{"id":"https://openalex.org/W4402353770","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650452","title":"Self-Attention Guided Advice Distillation in Multi-Agent Deep Reinforcement Learning","display_name":"Self-Attention Guided Advice Distillation in Multi-Agent Deep Reinforcement Learning","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402353770","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650452"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10650452","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115602293","display_name":"Yang Li","orcid":"https://orcid.org/0000-0003-4616-4230"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Li","raw_affiliation_strings":["Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061905512","display_name":"Sihan Zhou","orcid":"https://orcid.org/0000-0002-9415-7783"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sihan Zhou","raw_affiliation_strings":["Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044443092","display_name":"Yaqing Hou","orcid":"https://orcid.org/0000-0002-9929-2650"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaqing Hou","raw_affiliation_strings":["Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012728839","display_name":"Liran Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liran Zhou","raw_affiliation_strings":["Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091010251","display_name":"Hongwei Ge","orcid":"https://orcid.org/0000-0002-8937-1515"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongwei Ge","raw_affiliation_strings":["Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology,College of Computer Science and Technology,Dalian,China,116024","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051395807","display_name":"Liang Feng","orcid":"https://orcid.org/0000-0002-8356-7242"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Feng","raw_affiliation_strings":["Chongqing University,College of Computer Science,Chongqing,China,400044"],"affiliations":[{"raw_affiliation_string":"Chongqing University,College of Computer Science,Chongqing,China,400044","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115694943","display_name":"Siyu Wang","orcid":"https://orcid.org/0009-0001-2719-9748"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyu Wang","raw_affiliation_strings":["Chongqing University,College of Computer Science,Chongqing,China,400044"],"affiliations":[{"raw_affiliation_string":"Chongqing University,College of Computer Science,Chongqing,China,400044","institution_ids":["https://openalex.org/I158842170"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5115602293"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":0.3862,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.66852098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"32","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9429000020027161,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7999414205551147},{"id":"https://openalex.org/keywords/advice","display_name":"Advice (programming)","score":0.7727611660957336},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6955044865608215},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6603304743766785},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.49925780296325684},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39416226744651794},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3310711681842804},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.16895374655723572},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08194282650947571},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.06369224190711975}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7999414205551147},{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.7727611660957336},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6955044865608215},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6603304743766785},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.49925780296325684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39416226744651794},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3310711681842804},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.16895374655723572},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08194282650947571},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.06369224190711975},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10650452","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1821462560","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2539402368","https://openalex.org/W2575472443","https://openalex.org/W2620645529","https://openalex.org/W2911825241","https://openalex.org/W2913756371","https://openalex.org/W2945659171","https://openalex.org/W2963390684","https://openalex.org/W2963658727","https://openalex.org/W2963762747","https://openalex.org/W2964067469","https://openalex.org/W2976108375","https://openalex.org/W2996868001","https://openalex.org/W3011120880","https://openalex.org/W3094502228","https://openalex.org/W3108439758","https://openalex.org/W3114566572","https://openalex.org/W3200878391","https://openalex.org/W4211113537","https://openalex.org/W4246078117","https://openalex.org/W4288091739","https://openalex.org/W4383112908","https://openalex.org/W4385245566","https://openalex.org/W6631533588","https://openalex.org/W6730844258","https://openalex.org/W6747146101","https://openalex.org/W6762492177","https://openalex.org/W6766805167","https://openalex.org/W6767327128","https://openalex.org/W6775686901","https://openalex.org/W6780070463","https://openalex.org/W6784333009","https://openalex.org/W6788135285"],"related_works":["https://openalex.org/W4393601209","https://openalex.org/W3090906284","https://openalex.org/W253876680","https://openalex.org/W4393803066","https://openalex.org/W1987931999","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W4293797372","https://openalex.org/W4238052600"],"abstract_inverted_index":{"Advising":[0],"is":[1,181],"an":[2],"effective":[3],"method":[4,207],"to":[5,71,110,117,124,135,157,202],"enhance":[6,126],"agent":[7,27,47,99,156],"learning":[8,94,210],"performance":[9,211],"in":[10,53,58,191],"multi-agent":[11,146],"deep":[12],"reinforcement":[13],"learning.":[14],"Existing":[15],"advising":[16,204],"methods":[17],"typically":[18],"rely":[19],"on":[20],"a":[21,25,39,45,51,55,85,93,112,136,145,178,186],"teacher-student":[22],"framework":[23,149],"where":[24],"teacher":[26,46,90,163],"provides":[28],"student":[29,52,68,75,98,155],"agents":[30],"with":[31],"action":[32],"or":[33],"Q-value":[34],"advice.":[35,119],"However,":[36],"they":[37],"share":[38],"common":[40],"limitation:":[41],"the":[42,59,67,74,80,89,97,101,127,154,161,174,192,214],"advice":[43,81,147,159,167,180],"from":[44,88,160],"can":[48],"only":[49,122],"assist":[50],"making":[54],"one-time":[56,86],"decision":[57,77,114,171],"current":[60],"state":[61],"and":[62,115,164,188],"cannot":[63],"be":[64,108],"internalized":[65],"into":[66,168],"agent\u2019s":[69,76,128,193],"knowledge":[70],"intrinsically":[72],"change":[73],"model.":[78],"Consequently,":[79],"acts":[82],"more":[83],"like":[84],"instruction":[87],"rather":[91],"than":[92],"aid.":[95],"If":[96],"encounters":[100],"same":[102],"problem":[103],"again,":[104],"it":[105],"may":[106],"still":[107],"unable":[109],"make":[111],"sound":[113],"need":[116],"request":[118,158],"This":[120],"not":[121],"fails":[123],"rapidly":[125],"decision-making":[129,194],"ability":[130],"fundamentally":[131],"but":[132],"also":[133],"leads":[134],"considerable":[137],"waste":[138],"of":[139],"communication":[140,215],"costs.":[141],"Hence,":[142],"we":[143],"propose":[144],"distillation":[148],"through":[150],"attention":[151],"that":[152,166],"allows":[153],"experienced":[162],"distill":[165],"their":[169],"own":[170],"model":[172],"via":[173],"self-attention":[175],"mechanism.":[176],"As":[177],"result,":[179],"fully":[182],"utilized,":[183],"allowing":[184],"for":[185],"rapid":[187],"intrinsic":[189],"improvement":[190],"capabilities.":[195],"Our":[196],"empirical":[197],"evaluations":[198],"demonstrate":[199],"that,":[200],"compared":[201],"existing":[203],"methods,":[205],"our":[206],"significantly":[208],"improves":[209],"while":[212],"reducing":[213],"cost.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
