{"id":"https://openalex.org/W4323072986","doi":"https://doi.org/10.48550/arxiv.2303.01170","title":"Expert-Free Online Transfer Learning in Multi-Agent Reinforcement Learning","display_name":"Expert-Free Online Transfer Learning in Multi-Agent Reinforcement Learning","publication_year":2023,"publication_date":"2023-03-02","ids":{"openalex":"https://openalex.org/W4323072986","doi":"https://doi.org/10.48550/arxiv.2303.01170"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2303.01170","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.01170","pdf_url":"https://arxiv.org/pdf/2303.01170","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2303.01170","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105633952","display_name":"Alberto Castagna","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Castagna, Alberto","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059738292","display_name":"Ivana Duspari\u0107","orcid":"https://orcid.org/0000-0003-0621-5400"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dusparic, Ivana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5105633952"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9718999862670898,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9265000224113464,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8277531266212463},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8247609734535217},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7307034730911255},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6828672885894775},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6662915349006653},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.657893717288971},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5828956961631775},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.5584173202514648},{"id":"https://openalex.org/keywords/negative-transfer","display_name":"Negative transfer","score":0.4448893070220947},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.4209938049316406},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.1365211009979248},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08113542199134827}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8277531266212463},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8247609734535217},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7307034730911255},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6828672885894775},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6662915349006653},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.657893717288971},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5828956961631775},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.5584173202514648},{"id":"https://openalex.org/C2779178101","wikidata":"https://www.wikidata.org/wiki/Q6987274","display_name":"Negative transfer","level":3,"score":0.4448893070220947},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.4209938049316406},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.1365211009979248},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08113542199134827},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C171041071","wikidata":"https://www.wikidata.org/wiki/Q36870","display_name":"First language","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2303.01170","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.01170","pdf_url":"https://arxiv.org/pdf/2303.01170","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:tara.tcd.ie:2262/108443","is_oa":true,"landing_page_url":"http://hdl.handle.net/2262/108443","pdf_url":"https://www.tara.tcd.ie/bitstream/2262/108443/1/thesis%20-%20signedAC.pdf","source":{"id":"https://openalex.org/S4306400387","display_name":"Trinity's Access to Research Output (TARA) (Trinity College Dublin)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205274468","host_organization_name":"Trinity College Dublin","host_organization_lineage":["https://openalex.org/I205274468"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Thesis"},{"id":"doi:10.48550/arxiv.2303.01170","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2303.01170","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2303.01170","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.01170","pdf_url":"https://arxiv.org/pdf/2303.01170","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2394637485","display_name":null,"funder_award_id":"18/CRT/6223","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4323072986.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W2392266266","https://openalex.org/W2373702962","https://openalex.org/W2398668521","https://openalex.org/W2619137770","https://openalex.org/W2548360214","https://openalex.org/W2952841984","https://openalex.org/W2076338401","https://openalex.org/W2079749503"],"abstract_inverted_index":{"Transfer":[0,79],"learning":[1,90],"in":[2,72,91,162],"Reinforcement":[3],"Learning":[4,80],"(RL)":[5],"has":[6],"been":[7],"widely":[8],"studied":[9],"to":[10,28,41,66,104,207],"overcome":[11],"training":[12,30],"issues":[13],"of":[14,55,137,210],"Deep-RL,":[15],"i.e.,":[16],"exploration":[17],"cost,":[18],"data":[19],"availability":[20],"and":[21,98,102,118,154,158,172],"convergence":[22],"time,":[23],"by":[24],"introducing":[25],"a":[26,49,52,151,167],"way":[27],"enhance":[29],"phase":[31],"with":[32,157,203],"external":[33,195],"knowledge.":[34],"Generally,":[35],"knowledge":[36,103],"is":[37,61],"transferred":[38,106],"from":[39,142],"expert-agents":[40],"novices.":[42],"While":[43],"this":[44,73],"fixes":[45],"the":[46,56,208,211],"issue":[47],"for":[48,63],"novice":[50],"agent,":[51],"good":[53],"understanding":[54],"task":[57,212],"on":[58,115],"expert":[59,96,160],"agent":[60,101],"required":[62],"such":[64],"transfer":[65,89,99,112],"be":[67,105],"effective.":[68],"As":[69],"an":[70,82,135,204],"alternative,":[71],"paper":[74],"we":[75,124],"propose":[76,126],"Expert-Free":[77],"Online":[78],"(EF-OnTL),":[81],"algorithm":[83],"that":[84,139,180],"enables":[85],"expert-free":[86],"real-time":[87],"dynamic":[88],"multi-agent":[92],"system.":[93],"No":[94],"dedicated":[95],"exists,":[97],"source":[100],"are":[107],"dynamically":[108],"selected":[109],"at":[110],"each":[111],"step":[113],"based":[114],"agents'":[116],"performance":[117,185],"uncertainty.":[119],"To":[120],"improve":[121],"uncertainty":[122,141],"estimation,":[123],"also":[125],"State":[127],"Action":[128],"Reward":[129],"Next-State":[130],"Random":[131],"Network":[132],"Distillation":[133],"(sars-RND),":[134],"extension":[136],"RND":[138],"estimates":[140],"RL":[143],"agent-environment":[144],"interaction.":[145],"We":[146],"demonstrate":[147],"EF-OnTL":[148,181,200],"effectiveness":[149],"against":[150,188],"no-transfer":[152,202],"scenario":[153],"advice-based":[155,189],"baselines,":[156],"without":[159],"agents,":[161],"three":[163],"benchmark":[164],"tasks:":[165],"Cart-Pole,":[166],"grid-based":[168],"Multi-Team":[169],"Predator-Prey":[170],"(mt-pp)":[171],"Half":[173],"Field":[174],"Offense":[175],"(HFO).":[176],"Our":[177],"results":[178],"show":[179],"achieve":[182],"overall":[183],"comparable":[184],"when":[186],"compared":[187],"baselines":[190],"while":[191],"not":[192],"requiring":[193],"any":[194],"input":[196],"nor":[197],"threshold":[198],"tuning.":[199],"outperforms":[201],"improvement":[205],"related":[206],"complexity":[209],"addressed.":[213]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-14T08:36:36.166977","created_date":"2025-10-10T00:00:00"}
