{"id":"https://openalex.org/W4414093284","doi":"https://doi.org/10.3390/make7030098","title":"MCTS-Based Policy Improvement for Reinforcement Learning","display_name":"MCTS-Based Policy Improvement for Reinforcement Learning","publication_year":2025,"publication_date":"2025-09-10","ids":{"openalex":"https://openalex.org/W4414093284","doi":"https://doi.org/10.3390/make7030098"},"language":"en","primary_location":{"id":"doi:10.3390/make7030098","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7030098","pdf_url":"https://www.mdpi.com/2504-4990/7/3/98/pdf?version=1757492728","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/7/3/98/pdf?version=1757492728","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116405997","display_name":"Gy\u00f6rgy Csipp\u00e1n","orcid":null},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Gy\u00f6rgy Csipp\u00e1n","raw_affiliation_strings":["Asura Technologies Ltd., H-1122 Budapest, Hungary","Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Asura Technologies Ltd., H-1122 Budapest, Hungary","institution_ids":[]},{"raw_affiliation_string":"Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109870644","display_name":"Istv\u00e1n P\u00e9ter","orcid":"https://orcid.org/0009-0009-8651-2018"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Istv\u00e1n P\u00e9ter","raw_affiliation_strings":["Asura Technologies Ltd., H-1122 Budapest, Hungary","Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary"],"raw_orcid":"https://orcid.org/0009-0009-8651-2018","affiliations":[{"raw_affiliation_string":"Asura Technologies Ltd., H-1122 Budapest, Hungary","institution_ids":[]},{"raw_affiliation_string":"Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074225709","display_name":"B\u00e1lint K\u0151v\u00e1ri","orcid":"https://orcid.org/0000-0003-2178-2921"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"B\u00e1lint K\u0151v\u00e1ri","raw_affiliation_strings":["Asura Technologies Ltd., H-1122 Budapest, Hungary","Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary"],"raw_orcid":"https://orcid.org/0000-0003-2178-2921","affiliations":[{"raw_affiliation_string":"Asura Technologies Ltd., H-1122 Budapest, Hungary","institution_ids":[]},{"raw_affiliation_string":"Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000436758","display_name":"Tam\u00e1s B\u00e9csi","orcid":"https://orcid.org/0000-0002-1487-9672"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":true,"raw_author_name":"Tam\u00e1s B\u00e9csi","raw_affiliation_strings":["Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary"],"raw_orcid":"https://orcid.org/0000-0002-1487-9672","affiliations":[{"raw_affiliation_string":"Department of Control for Transportation and Vehicle Systems, Faculty of Transportation Engineering and Vehicle Engineering, Budapest University of Technology and Economics, H-1111 Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000436758"],"corresponding_institution_ids":["https://openalex.org/I29770179"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11481213,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"3","first_page":"98","last_page":"98"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10100","display_name":"Metaheuristic Optimization Algorithms Research","score":0.9724000096321106,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.863099992275238},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5874000191688538},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5723000168800354},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5496000051498413},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.47749999165534973},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4706999957561493},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.40209999680519104},{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.388700008392334}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.863099992275238},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7598999738693237},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6191999912261963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5899999737739563},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5874000191688538},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5723000168800354},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5496000051498413},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.47749999165534973},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4706999957561493},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.40209999680519104},{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.388700008392334},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.2596000134944916},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.25780001282691956}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/make7030098","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7030098","pdf_url":"https://www.mdpi.com/2504-4990/7/3/98/pdf?version=1757492728","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:e0d3f04ab08444fbb1a50a375e0bd7fe","is_oa":true,"landing_page_url":"https://doaj.org/article/e0d3f04ab08444fbb1a50a375e0bd7fe","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 7, Iss 3, p 98 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make7030098","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7030098","pdf_url":"https://www.mdpi.com/2504-4990/7/3/98/pdf?version=1757492728","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1688296686","display_name":null,"funder_award_id":"2024-2.1.1","funder_id":"https://openalex.org/F4320335908","funder_display_name":"Nemzeti Kutat\u00e1si, Fejleszt\u00e9si \u00e9s Innovaci\u00f3s Alap"},{"id":"https://openalex.org/G2745069999","display_name":null,"funder_award_id":"EK\u00d6P-24-4-I","funder_id":"https://openalex.org/F4320335908","funder_display_name":"Nemzeti Kutat\u00e1si, Fejleszt\u00e9si \u00e9s Innovaci\u00f3s Alap"},{"id":"https://openalex.org/G298428782","display_name":null,"funder_award_id":"TKP2021","funder_id":"https://openalex.org/F4320335908","funder_display_name":"Nemzeti Kutat\u00e1si, Fejleszt\u00e9si \u00e9s Innovaci\u00f3s Alap"},{"id":"https://openalex.org/G6388477364","display_name":null,"funder_award_id":"RRF-2.3.1-21-2022-00002","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7841365756","display_name":null,"funder_award_id":"BME-NVA-02","funder_id":"https://openalex.org/F4320335908","funder_display_name":"Nemzeti Kutat\u00e1si, Fejleszt\u00e9si \u00e9s Innovaci\u00f3s Alap"},{"id":"https://openalex.org/G8092522236","display_name":null,"funder_award_id":"2024-2.1.1-EK\u00d6P","funder_id":"https://openalex.org/F4320335908","funder_display_name":"Nemzeti Kutat\u00e1si, Fejleszt\u00e9si \u00e9s Innovaci\u00f3s Alap"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335908","display_name":"Nemzeti Kutat\u00e1si, Fejleszt\u00e9si \u00e9s Innovaci\u00f3s Alap","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414093284.pdf","grobid_xml":"https://content.openalex.org/works/W4414093284.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W1625390266","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2296073425","https://openalex.org/W2885010347","https://openalex.org/W2965870268","https://openalex.org/W2989847975","https://openalex.org/W3012544020","https://openalex.org/W3118210634","https://openalex.org/W4302010773","https://openalex.org/W4313532196","https://openalex.org/W4379739787","https://openalex.org/W4399146394","https://openalex.org/W4401069931","https://openalex.org/W4402660437"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"Curriculum":[0,57],"Learning":[1,9],"(CL)":[2],"is":[3],"a":[4,39,117,178],"potent":[5],"field":[6],"in":[7,56,82,184],"Machine":[8],"that":[10,47,111,150],"provides":[11],"several":[12],"excellent":[13],"techniques":[14],"for":[15,181],"enhancing":[16],"the":[17,20,24,30,52,66,71,77,96,129,166],"performance":[18],"of":[19,29,54,68,79,102,168],"training":[21,31,72],"process":[22,131],"given":[23],"same":[25],"data":[26],"points,":[27],"regardless":[28],"method":[32,81,105],"used.":[33],"In":[34],"this":[35],"research,":[36],"we":[37,75],"propose":[38],"novel":[40],"Monte":[41],"Carlo":[42],"Tree":[43],"Search":[44],"(MCTS)-based":[45],"technique":[46],"enhances":[48],"model":[49],"performance,":[50],"articulating":[51],"utilization":[53],"MCTS":[55,63,170],"Learning.":[58,187],"The":[59,163],"proposed":[60],"approach":[61,144],"leverages":[62],"to":[64,120,173],"optimize":[65,174],"sequence":[67],"batches":[69],"during":[70],"process.":[73],"First,":[74],"demonstrate":[76],"application":[78],"our":[80,104,143],"Reinforcement":[83,186],"Learning,":[84],"where":[85],"sparse":[86],"rewards":[87],"often":[88],"diminish":[89],"convergence":[90,136],"and":[91,99,108,115,137,160],"deteriorate":[92],"performance.":[93,140],"By":[94],"leveraging":[95],"strategic":[97],"planning":[98],"exploration":[100],"capabilities":[101],"MCTS,":[103],"systematically":[106],"identifies":[107],"selects":[109],"trajectories":[110],"are":[112],"more":[113],"informative":[114],"have":[116],"higher":[118],"potential":[119,167],"enhance":[121],"policy":[122,161],"improvement.":[123],"This":[124],"MCTS-guided":[125],"batch":[126,154,175],"optimization":[127],"focuses":[128],"learning":[130,158],"on":[132,145],"valuable":[133],"experiences,":[134],"accelerating":[135],"improving":[138],"overall":[139],"We":[141],"evaluate":[142],"standard":[146],"RL":[147],"benchmarks,":[148],"demonstrating":[149],"it":[151],"outperforms":[152],"conventional":[153],"selection":[155],"methods":[156],"regarding":[157],"speed":[159],"effectiveness.":[162],"results":[164],"highlight":[165],"combining":[169],"with":[171],"CL":[172],"selection,":[176],"offering":[177],"promising":[179],"direction":[180],"future":[182],"research":[183],"efficient":[185]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
