{"id":"https://openalex.org/W101901138","doi":"https://doi.org/10.3233/978-1-58603-893-9-106","title":"Improving Batch Reinforcement Learning Performance through Transfer of Samples","display_name":"Improving Batch Reinforcement Learning Performance through Transfer of Samples","publication_year":2008,"publication_date":"2008-01-01","ids":{"openalex":"https://openalex.org/W101901138","doi":"https://doi.org/10.3233/978-1-58603-893-9-106","mag":"101901138"},"language":"en","primary_location":{"id":"doi:10.3233/978-1-58603-893-9-106","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-58603-893-9-106","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014791481","display_name":"Alessandro Lazaric","orcid":"https://orcid.org/0000-0002-8970-413X"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Lazaric Alessandro","raw_affiliation_strings":["Department of Electronics and Information, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Information, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017130830","display_name":"Marcello Restelli","orcid":"https://orcid.org/0000-0002-6322-1076"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Restelli Marcello","raw_affiliation_strings":["Department of Electronics and Information, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Information, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060263666","display_name":"Andrea Bonarini","orcid":"https://orcid.org/0000-0002-4880-4521"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Bonarini Andrea","raw_affiliation_strings":["Department of Electronics and Information, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Information, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5014791481"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02484472,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"106","last_page":"117"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6080459952354431},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5400190949440002},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5062081217765808},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4116584062576294},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.22609689831733704},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.21246957778930664},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.10382309556007385}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6080459952354431},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5400190949440002},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5062081217765808},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4116584062576294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22609689831733704},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.21246957778930664},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.10382309556007385}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3233/978-1-58603-893-9-106","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-58603-893-9-106","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:oai:re.public.polimi.it:11311/549211","is_oa":false,"landing_page_url":"http://hdl.handle.net/11311/549211","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"mag:101901138","is_oa":false,"landing_page_url":"https://dblp.uni-trier.de/db/conf/stairs/stairs2008.html#LazaricRB08","pdf_url":null,"source":{"id":"https://openalex.org/S4306420941","display_name":"Starting AI Researchers' Symposium","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Starting AI Researchers' Symposium","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6000000238418579,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1492014007","https://openalex.org/W1515851193","https://openalex.org/W2004030284","https://openalex.org/W2090170171","https://openalex.org/W2110292307","https://openalex.org/W2120346334","https://openalex.org/W2131831090","https://openalex.org/W2133040789","https://openalex.org/W2568646110","https://openalex.org/W3035219538"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856"],"abstract_inverted_index":{"The":[0],"main":[1,37],"objective":[2],"of":[3,13,17,31,35,48,110],"transfer":[4,47],"in":[5,130],"reinforcement":[6],"learning":[7,14,60,133],"is":[8,39,45,128],"to":[9,40,79,102,118],"reduce":[10,107],"the":[11,15,24,36,46,59,103,108,115,125,132,144],"complexity":[12],"solution":[16],"a":[18,29,67],"target":[19,80,104,116,145],"task":[20,117],"by":[21],"effectively":[22],"reusing":[23],"knowledge":[25,49],"retained":[26],"from":[27,77,95,114,143],"solving":[28],"set":[30],"source":[32,78,96,138],"tasks.":[33],"One":[34],"problems":[38],"avoid":[41],"negative":[42],"transfer,":[43],"that":[44,52,56,70,83,98,124],"across":[50],"tasks":[51,81,97,139],"are":[53,99,140],"significantly":[54,141],"different":[55,142],"may":[57],"worsen":[58],"performance.":[61],"In":[62],"this":[63],"paper,":[64],"we":[65,106],"introduce":[66],"novel":[68],"algorithm":[69],"selectively":[71],"transfers":[72],"samples":[73,94,111],"(i.e.,":[74],"tuples":[75],"&amp;lang;s,a,s&amp;prime;,r&amp;rang;)":[76],"and":[82],"uses":[84],"them":[85],"as":[86],"input":[87],"for":[88],"batch":[89],"reinforcement-learning":[90],"algorithms.":[91],"By":[92],"transferring":[93],"mostly":[100],"similar":[101],"task,":[105],"number":[109],"actually":[112],"collected":[113],"learn":[119],"its":[120],"solution.":[121],"We":[122],"show":[123],"proposed":[126],"approach":[127],"effective":[129],"reducing":[131],"complexity,":[134],"even":[135],"when":[136],"some":[137],"task.":[146]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2016-06-24T00:00:00"}
