{"id":"https://openalex.org/W4309675654","doi":"https://doi.org/10.1109/kse56063.2022.9953750","title":"Distill Knowledge in Multi-task Reinforcement Learning with Optimal-Transport Regularization","display_name":"Distill Knowledge in Multi-task Reinforcement Learning with Optimal-Transport Regularization","publication_year":2022,"publication_date":"2022-10-19","ids":{"openalex":"https://openalex.org/W4309675654","doi":"https://doi.org/10.1109/kse56063.2022.9953750"},"language":"en","primary_location":{"id":"doi:10.1109/kse56063.2022.9953750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/kse56063.2022.9953750","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 14th International Conference on Knowledge and Systems Engineering (KSE)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.15603","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077010288","display_name":"Bang Giang Le","orcid":null},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Bang Giang Le","raw_affiliation_strings":["VNU University of Engineering and Technology,HMI Lab,Hanoi,Vietnam","HMI Lab, VNU University of Engineering and Technology, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"VNU University of Engineering and Technology,HMI Lab,Hanoi,Vietnam","institution_ids":["https://openalex.org/I67868205"]},{"raw_affiliation_string":"HMI Lab, VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053935756","display_name":"Viet Cuong Ta","orcid":"https://orcid.org/0000-0001-8058-5915"},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Viet Cuong Ta","raw_affiliation_strings":["VNU University of Engineering and Technology,HMI Lab,Hanoi,Vietnam","HMI Lab, VNU University of Engineering and Technology, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"VNU University of Engineering and Technology,HMI Lab,Hanoi,Vietnam","institution_ids":["https://openalex.org/I67868205"]},{"raw_affiliation_string":"HMI Lab, VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5077010288"],"corresponding_institution_ids":["https://openalex.org/I67868205"],"apc_list":null,"apc_paid":null,"fwci":0.1379,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.55744417,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"23","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7869967222213745},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7540819048881531},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.7249197363853455},{"id":"https://openalex.org/keywords/kullback\u2013leibler-divergence","display_name":"Kullback\u2013Leibler divergence","score":0.5642340183258057},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.5539742708206177},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5489693880081177},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5430071949958801},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5259447693824768},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5146883130073547},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.46455901861190796},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4185038208961487},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13492897152900696}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7869967222213745},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7540819048881531},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.7249197363853455},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.5642340183258057},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.5539742708206177},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5489693880081177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5430071949958801},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5259447693824768},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5146883130073547},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.46455901861190796},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4185038208961487},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13492897152900696},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/kse56063.2022.9953750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/kse56063.2022.9953750","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 14th International Conference on Knowledge and Systems Engineering (KSE)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2309.15603","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.15603","pdf_url":"https://arxiv.org/pdf/2309.15603","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.15603","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.15603","pdf_url":"https://arxiv.org/pdf/2309.15603","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4309675654.pdf","grobid_xml":"https://content.openalex.org/works/W4309675654.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1771410628","https://openalex.org/W1777239053","https://openalex.org/W2145339207","https://openalex.org/W2158131535","https://openalex.org/W2735995851","https://openalex.org/W2736601468","https://openalex.org/W2780057514","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2803180393","https://openalex.org/W2963267001","https://openalex.org/W2963674921","https://openalex.org/W2964615009","https://openalex.org/W2991355586","https://openalex.org/W2995481444","https://openalex.org/W3033602179","https://openalex.org/W3080813135","https://openalex.org/W3097907450","https://openalex.org/W3106539628","https://openalex.org/W3130956863","https://openalex.org/W3205046940","https://openalex.org/W3212887262","https://openalex.org/W4214717370","https://openalex.org/W4226217245","https://openalex.org/W4287329474","https://openalex.org/W4288359673","https://openalex.org/W4297797010","https://openalex.org/W4299434508","https://openalex.org/W4300799055","https://openalex.org/W4311565450","https://openalex.org/W4327653225","https://openalex.org/W4394662669","https://openalex.org/W4394670483","https://openalex.org/W6638018090","https://openalex.org/W6638088447","https://openalex.org/W6677067356","https://openalex.org/W6682962330","https://openalex.org/W6713603661","https://openalex.org/W6715102896","https://openalex.org/W6732837357","https://openalex.org/W6740801417","https://openalex.org/W6740879895","https://openalex.org/W6741002519","https://openalex.org/W6746825387","https://openalex.org/W6747473740","https://openalex.org/W6748638692","https://openalex.org/W6748839928","https://openalex.org/W6751617248","https://openalex.org/W6757021410","https://openalex.org/W6771270455","https://openalex.org/W6771876938","https://openalex.org/W6779381009","https://openalex.org/W6779669310","https://openalex.org/W6782414780","https://openalex.org/W6785353143","https://openalex.org/W6788422526","https://openalex.org/W6790190981","https://openalex.org/W6801798027","https://openalex.org/W6810766738"],"related_works":["https://openalex.org/W2105321464","https://openalex.org/W2887774187","https://openalex.org/W2388220555","https://openalex.org/W3048739257","https://openalex.org/W1665563134","https://openalex.org/W2963604926","https://openalex.org/W1616881371","https://openalex.org/W1520875569","https://openalex.org/W2199957582","https://openalex.org/W2033178790"],"abstract_inverted_index":{"In":[0,56],"multi-task":[1,148],"reinforcement":[2],"learning,":[3],"it":[4],"is":[5,94],"possible":[6],"to":[7,44,53,101,117,135],"improve":[8],"the":[9,25,34,46,54,61,65,76,82,87,103,119,122,138],"data":[10],"efficiency":[11],"of":[12,48,63,90,105,121,141],"training":[13],"agents":[14,142],"by":[15],"transferring":[16],"knowledge":[17,49],"from":[18,27,50],"other":[19],"different":[20,28],"but":[21],"related":[22],"tasks.":[23,91],"Because":[24],"experiences":[26],"tasks":[29],"are":[30,133],"usually":[31],"biased":[32],"toward":[33],"specific":[35],"task":[36,52],"goals.":[37],"Traditional":[38],"methods":[39],"rely":[40],"on":[41,112,147],"Kullback-Leibler":[42,66],"regularization":[43],"stabilize":[45],"transfer":[47],"one":[51],"others.":[55],"this":[57],"work,":[58],"we":[59,79],"explore":[60],"direction":[62],"replacing":[64],"divergence":[67],"with":[68],"a":[69],"novel":[70],"Optimal":[71,83,130],"transport-based":[72,131],"regularization.":[73],"By":[74],"using":[75],"Sinkhorn":[77],"mapping,":[78],"can":[80],"approximate":[81],"transport":[84],"distance":[85,93],"between":[86],"state":[88],"distribution":[89],"The":[92,124],"then":[95],"used":[96],"as":[97],"an":[98],"amortized":[99],"reward":[100],"regularize":[102],"amount":[104],"sharing":[106],"information.":[107],"We":[108],"experiment":[109],"our":[110,128],"frameworks":[111],"several":[113,145],"grid-based":[114],"navigation":[115],"multi-goal":[116],"validate":[118],"effectiveness":[120],"approach.":[123],"results":[125],"show":[126],"that":[127],"added":[129],"rewards":[132],"able":[134],"speed":[136],"up":[137],"learning":[139],"process":[140],"and":[143],"outperforms":[144],"baselines":[146],"learning.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
