{"id":"https://openalex.org/W2400318990","doi":"https://doi.org/10.1109/tcyb.2015.2483780","title":"Policy Search for the Optimal Control of Markov Decision Processes: A Novel Particle-Based Iterative Scheme","display_name":"Policy Search for the Optimal Control of Markov Decision Processes: A Novel Particle-Based Iterative Scheme","publication_year":2015,"publication_date":"2015-10-23","ids":{"openalex":"https://openalex.org/W2400318990","doi":"https://doi.org/10.1109/tcyb.2015.2483780","mag":"2400318990","pmid":"https://pubmed.ncbi.nlm.nih.gov/26513816"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2015.2483780","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2015.2483780","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/11311/1006220","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035756421","display_name":"Giorgio Manganini","orcid":"https://orcid.org/0000-0002-5394-4094"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giorgio Manganini","raw_affiliation_strings":["Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091526684","display_name":"Matteo Pirotta","orcid":null},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Matteo Pirotta","raw_affiliation_strings":["Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017130830","display_name":"Marcello Restelli","orcid":"https://orcid.org/0000-0002-6322-1076"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marcello Restelli","raw_affiliation_strings":["Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070453413","display_name":"Luigi Piroddi","orcid":"https://orcid.org/0000-0002-6063-8060"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Luigi Piroddi","raw_affiliation_strings":["Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy"],"raw_orcid":"https://orcid.org/0000-0002-6063-8060","affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060251698","display_name":"Maria Prandini","orcid":"https://orcid.org/0000-0001-7443-4117"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Maria Prandini","raw_affiliation_strings":["Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.1162,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.93176787,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"46","issue":"11","first_page":"2643","last_page":"2655"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.8338327407836914},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7096143364906311},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6514549255371094},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6396054625511169},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.5885143280029297},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5751466751098633},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5103294253349304},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.460800439119339},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4557151794433594},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4257183074951172},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.42227429151535034},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.35776716470718384},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.280353844165802},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27448999881744385},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2165757119655609},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08383980393409729}],"concepts":[{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.8338327407836914},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7096143364906311},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6514549255371094},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6396054625511169},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.5885143280029297},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5751466751098633},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5103294253349304},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.460800439119339},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4557151794433594},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4257183074951172},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.42227429151535034},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.35776716470718384},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.280353844165802},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27448999881744385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2165757119655609},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08383980393409729},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tcyb.2015.2483780","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2015.2483780","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:26513816","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/26513816","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null},{"id":"pmh:oai:re.public.polimi.it:11311/1006220","is_oa":true,"landing_page_url":"http://hdl.handle.net/11311/1006220","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:re.public.polimi.it:11311/1006220","is_oa":true,"landing_page_url":"http://hdl.handle.net/11311/1006220","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7400000095367432}],"awards":[{"id":"https://openalex.org/G4449215989","display_name":null,"funder_award_id":"643921","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1502364872","https://openalex.org/W1521939022","https://openalex.org/W1529558080","https://openalex.org/W1584101032","https://openalex.org/W1594783240","https://openalex.org/W1601081659","https://openalex.org/W1626155273","https://openalex.org/W1681245057","https://openalex.org/W1824914642","https://openalex.org/W1964534398","https://openalex.org/W1977655452","https://openalex.org/W2009303086","https://openalex.org/W2010152647","https://openalex.org/W2012587148","https://openalex.org/W2015482176","https://openalex.org/W2018726596","https://openalex.org/W2046376809","https://openalex.org/W2071676345","https://openalex.org/W2073384958","https://openalex.org/W2074603479","https://openalex.org/W2078977470","https://openalex.org/W2080813827","https://openalex.org/W2092924973","https://openalex.org/W2098432798","https://openalex.org/W2108051346","https://openalex.org/W2119567691","https://openalex.org/W2119717200","https://openalex.org/W2120346334","https://openalex.org/W2121863487","https://openalex.org/W2124477018","https://openalex.org/W2125612430","https://openalex.org/W2130105540","https://openalex.org/W2137092694","https://openalex.org/W2137104525","https://openalex.org/W2139053308","https://openalex.org/W2148053762","https://openalex.org/W2148642357","https://openalex.org/W2151416233","https://openalex.org/W2153874061","https://openalex.org/W2156737235","https://openalex.org/W2158738729","https://openalex.org/W2165726932","https://openalex.org/W2334782222","https://openalex.org/W2476930474","https://openalex.org/W2487144912","https://openalex.org/W2998589874","https://openalex.org/W4205513846","https://openalex.org/W4211221179","https://openalex.org/W4214717370","https://openalex.org/W4299401133","https://openalex.org/W6635508374","https://openalex.org/W6677737365","https://openalex.org/W6682074005","https://openalex.org/W6682998330","https://openalex.org/W6683195989","https://openalex.org/W6683356630"],"related_works":["https://openalex.org/W2049472152","https://openalex.org/W2807018115","https://openalex.org/W4388236136","https://openalex.org/W2315999538","https://openalex.org/W4200250224","https://openalex.org/W2606546905","https://openalex.org/W2126560268","https://openalex.org/W2285658092","https://openalex.org/W187740018","https://openalex.org/W2482498454"],"abstract_inverted_index":{"Classical":[0],"approximate":[1],"dynamic":[2],"programming":[3],"techniques":[4,17],"based":[5],"on":[6,41],"state-space":[7,160],"gridding":[8],"become":[9],"computationally":[10],"impracticable":[11],"for":[12,27],"high-dimensional":[13],"problems.":[14],"Policy":[15],"search":[16],"cope":[18],"with":[19,90,108],"this":[20],"curse":[21],"of":[22,44,74,101,113,119,143,151,158],"dimensionality":[23,157],"issue":[24],"by":[25,99],"searching":[26],"the":[28,42,59,62,66,75,91,136,149,152,156,159],"optimal":[29],"control":[30],"policy":[31,36,52,95,105,127,137],"in":[32],"a":[33,50,72,82,94,102],"restricted":[34],"parameterized":[35],"space.":[37],"We":[38],"here":[39,122],"focus":[40],"case":[43],"discrete":[45],"action":[46,67],"space":[47,64,77],"and":[48,87,139],"introduce":[49],"novel":[51],"parametrization":[53],"that":[54,78,130],"adopts":[55],"particles":[56,92,120,133],"to":[57,65,134],"describe":[58],"map":[60],"from":[61],"state":[63,76],"space,":[68],"each":[69],"particle":[70],"representing":[71],"region":[73],"is":[79,121,140],"mapped":[80],"into":[81],"certain":[83],"action.":[84],"The":[85,111],"locations":[86],"actions":[88],"associated":[89],"describing":[93],"can":[96],"be":[97],"tuned":[98],"means":[100],"recently":[103],"introduced":[104],"gradient":[106],"method":[107],"parameter-based":[109],"exploration.":[110],"task":[112],"selecting":[114],"an":[115,125],"appropriately":[116],"sized":[117],"set":[118],"solved":[123],"through":[124],"iterative":[126],"building":[128],"scheme":[129],"adds":[131],"new":[132],"improve":[135],"performance":[138],"also":[141],"capable":[142],"removing":[144],"redundant":[145],"particles.":[146],"Experiments":[147],"demonstrate":[148],"scalability":[150],"proposed":[153],"approach":[154],"as":[155],"grows.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
