{"id":"https://openalex.org/W2020573190","doi":"https://doi.org/10.1109/tcyb.2014.2319733","title":"Stochastic Abstract Policies: Generalizing Knowledge to Improve Reinforcement Learning","display_name":"Stochastic Abstract Policies: Generalizing Knowledge to Improve Reinforcement Learning","publication_year":2014,"publication_date":"2014-05-13","ids":{"openalex":"https://openalex.org/W2020573190","doi":"https://doi.org/10.1109/tcyb.2014.2319733","mag":"2020573190","pmid":"https://pubmed.ncbi.nlm.nih.gov/24835233"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2014.2319733","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2014.2319733","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://dx.doi.org/10.1109/TCYB.2014.2319733","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071736011","display_name":"Marcelo Li Koga","orcid":null},"institutions":[{"id":"https://openalex.org/I17974374","display_name":"Universidade de S\u00e3o Paulo","ror":"https://ror.org/036rp1748","country_code":"BR","type":"education","lineage":["https://openalex.org/I17974374"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Marcelo L. Koga","raw_affiliation_strings":["Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP, Brazil","Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo-SP, Brazil"],"affiliations":[{"raw_affiliation_string":"Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP, Brazil","institution_ids":["https://openalex.org/I17974374"]},{"raw_affiliation_string":"Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo-SP, Brazil","institution_ids":["https://openalex.org/I17974374"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113919343","display_name":"Valdinei Freire","orcid":null},"institutions":[{"id":"https://openalex.org/I17974374","display_name":"Universidade de S\u00e3o Paulo","ror":"https://ror.org/036rp1748","country_code":"BR","type":"education","lineage":["https://openalex.org/I17974374"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Valdinei Freire","raw_affiliation_strings":["Escola de Artes, Ci\u00eancias e Humanidades, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP, Brazil","Escola de Artes, Ci\u00eancias e Humanidades, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP Brazil"],"affiliations":[{"raw_affiliation_string":"Escola de Artes, Ci\u00eancias e Humanidades, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP, Brazil","institution_ids":["https://openalex.org/I17974374"]},{"raw_affiliation_string":"Escola de Artes, Ci\u00eancias e Humanidades, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP Brazil","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069264027","display_name":"Anna Helena Reali Costa","orcid":"https://orcid.org/0000-0001-7309-4528"},"institutions":[{"id":"https://openalex.org/I17974374","display_name":"Universidade de S\u00e3o Paulo","ror":"https://ror.org/036rp1748","country_code":"BR","type":"education","lineage":["https://openalex.org/I17974374"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Anna H. R. Costa","raw_affiliation_strings":["Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP, Brazil","Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo-SP, Brazil"],"affiliations":[{"raw_affiliation_string":"Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo, SP, Brazil","institution_ids":["https://openalex.org/I17974374"]},{"raw_affiliation_string":"Escola Polit\u00e9cnica, Universidade de S\u00e3o Paulo, S\u00e3o Paulo-SP, Brazil","institution_ids":["https://openalex.org/I17974374"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5071736011"],"corresponding_institution_ids":["https://openalex.org/I17974374"],"apc_list":null,"apc_paid":null,"fwci":5.0921,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.95359239,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"45","issue":"1","first_page":"77","last_page":"88"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8016995787620544},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7749752998352051},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7294691801071167},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6596540212631226},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6239528059959412},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.5888376235961914},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5662328600883484},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5485936403274536},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.5328662395477295},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.5144231915473938},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5036265254020691},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.46180978417396545},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.442105770111084},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.41258978843688965},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3669629693031311},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.24905019998550415},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11697494983673096},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07574468851089478}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8016995787620544},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7749752998352051},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7294691801071167},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6596540212631226},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6239528059959412},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.5888376235961914},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5662328600883484},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5485936403274536},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.5328662395477295},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.5144231915473938},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5036265254020691},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.46180978417396545},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.442105770111084},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.41258978843688965},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3669629693031311},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.24905019998550415},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11697494983673096},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07574468851089478},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008962","descriptor_name":"Models, Theoretical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008962","descriptor_name":"Models, Theoretical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008962","descriptor_name":"Models, Theoretical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D013269","descriptor_name":"Stochastic Processes","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D013269","descriptor_name":"Stochastic Processes","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D013269","descriptor_name":"Stochastic Processes","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1109/tcyb.2014.2319733","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2014.2319733","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:24835233","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/24835233","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null},{"id":"pmh:002664730","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1109/TCYB.2014.2319733","pdf_url":null,"source":{"id":"https://openalex.org/S4306402641","display_name":"LA Referencia (Red Federada de Repositorios Institucionales de Publicaciones Cient\u00edficas)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4383465926","host_organization_name":"LA Referencia","host_organization_lineage":["https://openalex.org/I4383465926"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"reponame:Reposit\u00f3rio Institucional da USP (Biblioteca Digital da Produ\u00e7\u00e3o Intelectual)","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:002664730","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1109/TCYB.2014.2319733","pdf_url":null,"source":{"id":"https://openalex.org/S4306402641","display_name":"LA Referencia (Red Federada de Repositorios Institucionales de Publicaciones Cient\u00edficas)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4383465926","host_organization_name":"LA Referencia","host_organization_lineage":["https://openalex.org/I4383465926"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"reponame:Reposit\u00f3rio Institucional da USP (Biblioteca Digital da Produ\u00e7\u00e3o Intelectual)","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1976427685","display_name":null,"funder_award_id":"Proc. 311058/2011-6","funder_id":"https://openalex.org/F4320322025","funder_display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico"},{"id":"https://openalex.org/G506067539","display_name":null,"funder_award_id":"Proc. 2011/19280-8","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"},{"id":"https://openalex.org/G6909796275","display_name":null,"funder_award_id":"Proc. 2012/02190-9","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"},{"id":"https://openalex.org/G7243449664","display_name":null,"funder_award_id":"Proc. 2012/19627-0","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"}],"funders":[{"id":"https://openalex.org/F4320320997","display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo","ror":"https://ror.org/02ddkpn78"},{"id":"https://openalex.org/F4320322025","display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","ror":"https://ror.org/03swz6y49"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W929682","https://openalex.org/W193076044","https://openalex.org/W249650263","https://openalex.org/W1517613402","https://openalex.org/W1533597678","https://openalex.org/W1541084404","https://openalex.org/W1654728867","https://openalex.org/W1687873425","https://openalex.org/W1799762961","https://openalex.org/W1974043469","https://openalex.org/W2008809493","https://openalex.org/W2031677098","https://openalex.org/W2031727428","https://openalex.org/W2033072307","https://openalex.org/W2050630772","https://openalex.org/W2056584142","https://openalex.org/W2096600060","https://openalex.org/W2097381042","https://openalex.org/W2097498341","https://openalex.org/W2098723043","https://openalex.org/W2106953752","https://openalex.org/W2109910161","https://openalex.org/W2113300836","https://openalex.org/W2119567691","https://openalex.org/W2126834960","https://openalex.org/W2133040789","https://openalex.org/W2151285963","https://openalex.org/W2158618075","https://openalex.org/W2165698076","https://openalex.org/W2170164558","https://openalex.org/W2183728818","https://openalex.org/W2253637894","https://openalex.org/W2334782222","https://openalex.org/W2397240726","https://openalex.org/W3020831056","https://openalex.org/W3103256699","https://openalex.org/W6600033343","https://openalex.org/W6607829467","https://openalex.org/W6631055690","https://openalex.org/W6637046239","https://openalex.org/W6637463265","https://openalex.org/W6674600207","https://openalex.org/W6674664616","https://openalex.org/W6674887642","https://openalex.org/W6676297359","https://openalex.org/W6679818365","https://openalex.org/W6686054324","https://openalex.org/W6712173889","https://openalex.org/W6837117956"],"related_works":["https://openalex.org/W2475116013","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2140315382","https://openalex.org/W2059109728","https://openalex.org/W322691623","https://openalex.org/W2494989134","https://openalex.org/W2509444723","https://openalex.org/W2004958254"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,27,39,174,220],"(RL)":[2],"enables":[3],"an":[4,87,136],"agent":[5,218],"to":[6,28,90,139],"learn":[7],"behavior":[8,51,171],"by":[9,41,213],"acquiring":[10],"experience":[11],"through":[12],"trial-and-error":[13],"interactions":[14],"with":[15,111,192],"a":[16,32,56,63,103,112,117,124,163,196,207,221],"dynamic":[17],"environment.":[18],"However,":[19],"knowledge":[20,61,121,141],"is":[21,86],"usually":[22],"built":[23],"from":[24,52,62],"scratch":[25],"and":[26,94,116,167,176,200],"behave":[29],"may":[30],"take":[31],"long":[33],"time.":[34],"Here,":[35],"we":[36,74],"improve":[37],"the":[38,47,53,60,98,143,155,169,173,179,193,204,217],"performance":[40],"leveraging":[42],"prior":[43],"knowledge;":[44],"that":[45,76,81,109,203],"is,":[46],"learner":[48],"shows":[49],"proper":[50],"beginning":[54],"of":[55,65,101,105,185,195,198,206],"target":[57,222],"task,":[58],"using":[59,102],"set":[64],"known,":[66],"previously":[67],"solved,":[68],"source":[69,186],"tasks.":[70,131,158,187],"In":[71],"this":[72,95],"paper,":[73],"argue":[75],"building":[77],"stochastic":[78,125],"abstract":[79,126,133],"policies":[80,134],"generalize":[82],"over":[83],"past":[84],"experiences":[85],"effective":[88,137],"way":[89,138],"provide":[91,146],"such":[92],"improvement":[93],"generalization":[96],"outperforms":[97],"current":[99],"practice":[100],"library":[104,197],"policies.":[106],"We":[107,159,188],"achieve":[108],"contributing":[110],"new":[113,129],"algorithm,":[114],"AbsProb-PI-multiple":[115],"framework":[118],"for":[119,182],"transferring":[120],"represented":[122],"as":[123],"policy":[127,209],"in":[128,162],"RL":[130],"Stochastic":[132],"offer":[135],"encode":[140],"because":[142],"abstraction":[144],"they":[145],"not":[147],"only":[148],"generalizes":[149],"solutions":[150],"but":[151],"also":[152,177],"facilitates":[153],"extracting":[154],"similarities":[156],"among":[157],"perform":[160],"experiments":[161,201],"robotic":[164],"navigation":[165],"environment":[166],"analyze":[168],"agent's":[170],"throughout":[172],"process":[175],"assess":[178],"transfer":[180,194],"ratio":[181],"different":[183],"amounts":[184],"compare":[189],"our":[190],"method":[191],"policies,":[199],"show":[202],"use":[205],"generalized":[208],"produces":[210],"better":[211],"results":[212],"more":[214],"effectively":[215],"guiding":[216],"when":[219],"task.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
