{"id":"https://openalex.org/W4387171816","doi":"https://doi.org/10.3233/faia230427","title":"Model-Based Reinforcement Learning with Multi-Step Plan Value Estimation","display_name":"Model-Based Reinforcement Learning with Multi-Step Plan Value Estimation","publication_year":2023,"publication_date":"2023-09-28","ids":{"openalex":"https://openalex.org/W4387171816","doi":"https://doi.org/10.3233/faia230427"},"language":"en","primary_location":{"id":"doi:10.3233/faia230427","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia230427","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230427","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230427","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005113658","display_name":"Haoxin Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoxin Lin","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China","Polixir Technologies, Nanjing, Jiangsu, China","National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China; Polixir Technologies, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"Polixir Technologies, Nanjing, Jiangsu, China","institution_ids":[]},{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China; Polixir Technologies, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050836778","display_name":"Yihao Sun","orcid":"https://orcid.org/0000-0002-5829-8554"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihao Sun","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020583839","display_name":"Jiaji Zhang","orcid":"https://orcid.org/0000-0002-4939-8676"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaji Zhang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100458530","display_name":"Yang Yu","orcid":"https://orcid.org/0000-0002-8209-2898"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yu","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China","Peng Cheng Laboratory, Shenzhen, Guangdong, China","Polixir Technologies, Nanjing, Jiangsu, China","National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China; Peng Cheng Laboratory, Shenzhen, Guangdong, China; Polixir Technologies, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Polixir Technologies, Nanjing, Jiangsu, China","institution_ids":[]},{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China; Peng Cheng Laboratory, Shenzhen, Guangdong, China; Polixir Technologies, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005113658"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":3.1254,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.92797468,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9581999778747559,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9197999835014343,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8980803489685059},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7616215944290161},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.5736266374588013},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5119982361793518},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5015237331390381},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4994332790374756},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.49463531374931335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4778268337249756},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4742690324783325},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.46595603227615356},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.4502682685852051},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11757853627204895}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8980803489685059},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7616215944290161},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.5736266374588013},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5119982361793518},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5015237331390381},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4994332790374756},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.49463531374931335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4778268337249756},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4742690324783325},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.46595603227615356},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.4502682685852051},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11757853627204895},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia230427","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia230427","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230427","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia230427","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia230427","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230427","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1614471940","display_name":null,"funder_award_id":"2020AAA0","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G168428553","display_name":null,"funder_award_id":"PCL2021A12","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4366907579","display_name":null,"funder_award_id":"2020AAA0107200","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4856701993","display_name":"Improving Data Quality of Advanced LIGO Gravitational-Wave Searches","funder_award_id":"1921006","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G528746023","display_name":null,"funder_award_id":"61921006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387171816.pdf"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W1771410628","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2173248099","https://openalex.org/W2593237273","https://openalex.org/W2732671178","https://openalex.org/W2743381431","https://openalex.org/W2781726626","https://openalex.org/W2786388471","https://openalex.org/W2789824229","https://openalex.org/W2804010078","https://openalex.org/W2804132768","https://openalex.org/W2805762288","https://openalex.org/W2810754397","https://openalex.org/W2859967432","https://openalex.org/W2877093712","https://openalex.org/W2899424034","https://openalex.org/W2923023063","https://openalex.org/W2946949757","https://openalex.org/W2947861305","https://openalex.org/W2950624398","https://openalex.org/W2962872206","https://openalex.org/W2962879844","https://openalex.org/W2963654596","https://openalex.org/W2963864421","https://openalex.org/W2996449210","https://openalex.org/W3034680299","https://openalex.org/W3092490845","https://openalex.org/W3121786643","https://openalex.org/W3156554472","https://openalex.org/W3178520484","https://openalex.org/W4210821597","https://openalex.org/W4226211093","https://openalex.org/W4281686999","https://openalex.org/W4282813355","https://openalex.org/W4287388225","https://openalex.org/W4288319859","https://openalex.org/W4289360636","https://openalex.org/W4298206671"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2024136090","https://openalex.org/W2348534359"],"abstract_inverted_index":{"A":[0],"promising":[1],"way":[2],"to":[3,26,47,58],"improve":[4],"the":[5,23,32,43,52,74,81,98,103,132],"sample":[6,139],"efficiency":[7,140],"of":[8,89,131],"reinforcement":[9,114],"learning":[10,115],"is":[11,148],"model-based":[12,70,113,143],"methods,":[13],"in":[14,22,42],"which":[15,79],"many":[16],"explorations":[17],"and":[18,96,135],"evaluations":[19],"can":[20],"happen":[21],"learned":[24,33,133],"models":[25],"save":[27],"real-world":[28],"samples.":[29],"However,":[30],"when":[31],"model":[34,38,44,134],"has":[35],"a":[36,87,93,128,137],"non-negligible":[37],"error,":[39],"sequential":[40],"steps":[41],"are":[45],"hard":[46],"be":[48],"accurately":[49],"evaluated,":[50],"limiting":[51],"model\u2019s":[53],"utilization.":[54],"This":[55],"paper":[56],"proposes":[57],"alleviate":[59],"this":[60],"issue":[61],"by":[62,100],"introducing":[63],"multi-step":[64,75,104],"plans":[65,91],"into":[66],"policy":[67,99,105],"optimization":[68],"for":[69],"RL.":[71],"We":[72],"employ":[73],"plan":[76,108],"value":[77,109],"estimation,":[78],"evaluates":[80],"expected":[82],"discounted":[83],"return":[84],"after":[85],"executing":[86],"sequence":[88],"action":[90],"at":[92,150],"given":[94],"state,":[95],"updates":[97],"directly":[101],"computing":[102],"gradient":[106],"via":[107],"estimation.":[110],"The":[111,146],"new":[112],"algorithm":[116],"MPPVE":[117],"(Model-based":[118],"Planning":[119],"Policy":[120],"Learning":[121],"with":[122],"Multi-step":[123],"Plan":[124],"Value":[125],"Estimation)":[126],"shows":[127],"better":[129,138],"utilization":[130],"achieves":[136],"than":[141],"state-of-the-art":[142],"RL":[144],"approaches.":[145],"code":[147],"available":[149],"https://github.com/HxLyn3/MPPVE.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
