{"id":"https://openalex.org/W4317350110","doi":"https://doi.org/10.1109/vtc2022-fall57202.2022.10012835","title":"Sample-Efficient Multi-Agent Reinforcement Learning with Demonstrations for Flocking Control","display_name":"Sample-Efficient Multi-Agent Reinforcement Learning with Demonstrations for Flocking Control","publication_year":2022,"publication_date":"2022-09-01","ids":{"openalex":"https://openalex.org/W4317350110","doi":"https://doi.org/10.1109/vtc2022-fall57202.2022.10012835"},"language":"en","primary_location":{"id":"doi:10.1109/vtc2022-fall57202.2022.10012835","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vtc2022-fall57202.2022.10012835","pdf_url":null,"source":{"id":"https://openalex.org/S4363607792","display_name":"2022 IEEE 96th Vehicular Technology Conference (VTC2022-Fall)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 96th Vehicular Technology Conference (VTC2022-Fall)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089892883","display_name":"Yunbo Qiu","orcid":"https://orcid.org/0000-0001-5069-6673"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yunbo Qiu","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056341761","display_name":"Yuzhu Zhan","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuzhu Zhan","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067942871","display_name":"Yue Jin","orcid":"https://orcid.org/0000-0002-0344-4868"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Jin","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020075443","display_name":"Jian Wang","orcid":"https://orcid.org/0000-0002-4840-9716"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Wang","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100441855","display_name":"Xudong Zhang","orcid":"https://orcid.org/0000-0002-6465-7437"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xudong Zhang","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5089892883"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.8639,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.70189099,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/marl","display_name":"Marl","score":0.8628724813461304},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.7053468227386475},{"id":"https://openalex.org/keywords/flocking","display_name":"Flocking (texture)","score":0.7027982473373413},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.670121431350708},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5836064219474792},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4912497401237488},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.44177180528640747},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.3204282820224762},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.26139867305755615}],"concepts":[{"id":"https://openalex.org/C92927620","wikidata":"https://www.wikidata.org/wiki/Q184053","display_name":"Marl","level":3,"score":0.8628724813461304},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.7053468227386475},{"id":"https://openalex.org/C2781220375","wikidata":"https://www.wikidata.org/wiki/Q814208","display_name":"Flocking (texture)","level":2,"score":0.7027982473373413},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.670121431350708},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5836064219474792},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4912497401237488},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.44177180528640747},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.3204282820224762},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.26139867305755615},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C109007969","wikidata":"https://www.wikidata.org/wiki/Q749565","display_name":"Structural basin","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/vtc2022-fall57202.2022.10012835","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vtc2022-fall57202.2022.10012835","pdf_url":null,"source":{"id":"https://openalex.org/S4363607792","display_name":"2022 IEEE 96th Vehicular Technology Conference (VTC2022-Fall)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 96th Vehicular Technology Conference (VTC2022-Fall)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Life below water","id":"https://metadata.un.org/sdg/14"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1548913039","https://openalex.org/W1998030085","https://openalex.org/W2098774185","https://openalex.org/W2124394479","https://openalex.org/W2150312211","https://openalex.org/W2153809277","https://openalex.org/W2539402368","https://openalex.org/W2588187389","https://openalex.org/W2806824292","https://openalex.org/W2911495555","https://openalex.org/W2912083425","https://openalex.org/W2914304175","https://openalex.org/W2918849077","https://openalex.org/W2954085204","https://openalex.org/W2963099939","https://openalex.org/W2982316857","https://openalex.org/W3035458768","https://openalex.org/W3048703323","https://openalex.org/W3056510116","https://openalex.org/W3113915485","https://openalex.org/W3172360140","https://openalex.org/W4214717370","https://openalex.org/W6632828981","https://openalex.org/W6637569971","https://openalex.org/W6674884181","https://openalex.org/W6684338915","https://openalex.org/W6718092244","https://openalex.org/W6735698609","https://openalex.org/W6738796088","https://openalex.org/W6753207554","https://openalex.org/W6768977950","https://openalex.org/W6796589144","https://openalex.org/W6981852861"],"related_works":["https://openalex.org/W2126019709","https://openalex.org/W1702901972","https://openalex.org/W4249798507","https://openalex.org/W2069775250","https://openalex.org/W2093541819","https://openalex.org/W2032294417","https://openalex.org/W2152754392","https://openalex.org/W1989172970","https://openalex.org/W2196316523","https://openalex.org/W1899363654"],"abstract_inverted_index":{"Flocking":[0],"control":[1,42],"is":[2],"a":[3,57,74,135],"significant":[4],"problem":[5,39,150],"in":[6,89,128,148],"multi-agent":[7,11,16,33],"systems":[8],"such":[9],"as":[10],"unmanned":[12],"aerial":[13],"vehicles":[14],"and":[15,24,69,109,113,145],"autonomous":[17],"underwater":[18],"vehicles,":[19],"which":[20,83],"enhances":[21],"the":[22,38,70,98,129,149],"cooperativity":[23],"safety":[25],"of":[26,40,60,100,131,151],"agents.":[27,96],"In":[28],"contrast":[29],"to":[30,62,94],"traditional":[31,92],"methods,":[32],"reinforcement":[34],"learning":[35],"(MARL)":[36],"solves":[37],"flocking":[41,152],"more":[43],"flexibly.":[44],"However,":[45],"methods":[46,93],"based":[47],"on":[48],"MARL":[49,81,108,133],"suffer":[50],"from":[51,65,105,116],"sample":[52,126,143],"inefficiency,":[53],"since":[54],"they":[55],"require":[56],"huge":[58],"number":[59],"experiences":[61],"be":[63],"collected":[64,88],"interactions":[66],"between":[67],"agents":[68,102],"environment.":[71],"We":[72],"propose":[73],"novel":[75],"method":[76],"Pretraining":[77],"with":[78,91,121,134,155],"Demonstrations":[79],"for":[80],"(PwD-MARL),":[82],"can":[84],"utilize":[85],"non-expert":[86,122],"demonstrations":[87,106],"advance":[90],"pretrain":[95],"During":[97],"process":[99,130],"pretraining,":[101],"learn":[103],"policies":[104],"by":[107],"behavior":[110],"cloning":[111],"simultaneously,":[112],"are":[114],"prevented":[115],"overfitting":[117],"demonstrations.":[118,159],"By":[119],"pretraining":[120],"demonstrations,":[123],"PwD-MARL":[124,141],"improves":[125,142],"efficiency":[127,144],"online":[132],"warm":[136],"start.":[137],"Experiments":[138],"show":[139],"that":[140],"policy":[146],"performance":[147],"control,":[153],"even":[154],"bad":[156],"or":[157],"few":[158]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
