{"id":"https://openalex.org/W4414538730","doi":"https://doi.org/10.1109/tvt.2025.3614880","title":"Reinforcement Learning Decision and Planning Algorithm Using Posterior Return Heuristic","display_name":"Reinforcement Learning Decision and Planning Algorithm Using Posterior Return Heuristic","publication_year":2025,"publication_date":"2025-09-26","ids":{"openalex":"https://openalex.org/W4414538730","doi":"https://doi.org/10.1109/tvt.2025.3614880"},"language":"en","primary_location":{"id":"doi:10.1109/tvt.2025.3614880","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvt.2025.3614880","pdf_url":null,"source":{"id":"https://openalex.org/S10936095","display_name":"IEEE Transactions on Vehicular Technology","issn_l":"0018-9545","issn":["0018-9545","1939-9359"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Vehicular Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103651217","display_name":"Zeming Ma","orcid":"https://orcid.org/0009-0002-0806-0617"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhengtang Ma","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University, Urumqi, China"],"raw_orcid":"https://orcid.org/0009-0002-0806-0617","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi, China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081426761","display_name":"Eksan Firkat","orcid":"https://orcid.org/0000-0003-0755-149X"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I4210114105","display_name":"Tsinghua\u2013Berkeley Shenzhen Institute","ror":"https://ror.org/02hhwwz98","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210114105","https://openalex.org/I95457486","https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Eksan Firkat","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Shenzhen, China","Tsinghua Shenzhen International Graduate School and Research Associate, Great Bay University, China"],"raw_orcid":"https://orcid.org/0000-0003-0755-149X","affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I4210114105"]},{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School and Research Associate, Great Bay University, China","institution_ids":["https://openalex.org/I3131625388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061318594","display_name":"Xiaming Yuan","orcid":"https://orcid.org/0000-0003-3039-6184"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaming Yuan","raw_affiliation_strings":["Department of Precision Instrument, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3039-6184","affiliations":[{"raw_affiliation_string":"Department of Precision Instrument, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025332713","display_name":"Yijian Duan","orcid":"https://orcid.org/0000-0002-6886-4759"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijian Duan","raw_affiliation_strings":["Guangxi University, Guangxi, China"],"raw_orcid":"https://orcid.org/0000-0002-6886-4759","affiliations":[{"raw_affiliation_string":"Guangxi University, Guangxi, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108047893","display_name":"Jihong Zhu","orcid":"https://orcid.org/0000-0001-6830-1211"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jihong Zhu","raw_affiliation_strings":["Department of Precision Instrument, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6830-1211","affiliations":[{"raw_affiliation_string":"Department of Precision Instrument, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075133420","display_name":"Askar Hamdulla","orcid":"https://orcid.org/0000-0002-2321-308X"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Askar Hamdulla","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University, Urumqi, China"],"raw_orcid":"https://orcid.org/0000-0002-2321-308X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi, China","institution_ids":["https://openalex.org/I96908189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103651217"],"corresponding_institution_ids":["https://openalex.org/I96908189"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31547317,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"75","issue":"3","first_page":"3916","last_page":"3927"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.24580000340938568,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.24580000340938568,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12546","display_name":"Smart Parking Systems Research","score":0.2159000039100647,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8436999917030334},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.5907999873161316},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5335000157356262},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4968999922275543},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.44339999556541443},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.43880000710487366},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3995000123977661}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8436999917030334},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.5907999873161316},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5856999754905701},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5507000088691711},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5335000157356262},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.504800021648407},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4968999922275543},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.44339999556541443},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.43880000710487366},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3995000123977661},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.31940001249313354},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.31619998812675476},{"id":"https://openalex.org/C57830394","wikidata":"https://www.wikidata.org/wiki/Q278079","display_name":"Posterior probability","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C64848388","wikidata":"https://www.wikidata.org/wiki/Q188867","display_name":"Futures studies","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.27480000257492065},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvt.2025.3614880","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvt.2025.3614880","pdf_url":null,"source":{"id":"https://openalex.org/S10936095","display_name":"IEEE Transactions on Vehicular Technology","issn_l":"0018-9545","issn":["0018-9545","1939-9359"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Vehicular Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W192919555","https://openalex.org/W2117211893","https://openalex.org/W2121581165","https://openalex.org/W2126316555","https://openalex.org/W2134634502","https://openalex.org/W2145339207","https://openalex.org/W2163061173","https://openalex.org/W2336416123","https://openalex.org/W2766447205","https://openalex.org/W2793998795","https://openalex.org/W2892258706","https://openalex.org/W2905334533","https://openalex.org/W2966477753","https://openalex.org/W2966948900","https://openalex.org/W2967452881","https://openalex.org/W2968202530","https://openalex.org/W2989958156","https://openalex.org/W3000193750","https://openalex.org/W3002725938","https://openalex.org/W3010740942","https://openalex.org/W3090027660","https://openalex.org/W3107668387","https://openalex.org/W3117126067","https://openalex.org/W3119637750","https://openalex.org/W3120230705","https://openalex.org/W3124225140","https://openalex.org/W3129616587","https://openalex.org/W3148740559","https://openalex.org/W3158592412","https://openalex.org/W3201913099","https://openalex.org/W3210290940","https://openalex.org/W3214738088","https://openalex.org/W4285092328","https://openalex.org/W4285295684","https://openalex.org/W4381733136","https://openalex.org/W4386219277","https://openalex.org/W4405599060","https://openalex.org/W4407566067","https://openalex.org/W4411337166"],"related_works":[],"abstract_inverted_index":{"The":[0,72,98],"complexities":[1],"and":[2,41,69,106,116],"dynamics":[3],"of":[4,50,75],"modern":[5],"driving":[6,15],"environments":[7],"have":[8],"amplified":[9],"the":[10,48,63,67,70],"uncertainties":[11],"faced":[12],"by":[13,33],"autonomous":[14],"systems,":[16],"posing":[17],"significant":[18],"challenges":[19,49],"to":[20,38,61,91],"their":[21],"decision-making":[22,105,121],"processes.":[23],"This":[24],"paper":[25],"introduces":[26],"a":[27,56,87],"novel":[28],"reinforcement":[29,54],"learning":[30],"framework":[31],"inspired":[32],"posterior":[34,73],"return":[35],"estimation,":[36],"designed":[37],"enhance":[39],"safety":[40],"foresight":[42],"in":[43,53],"robotic":[44],"decision-making.":[45,97],"To":[46],"overcome":[47],"long-term":[51],"dependency":[52],"learning,":[55],"tree":[57],"structure":[58],"is":[59,80,101],"utilized":[60],"record":[62],"interaction":[64],"history":[65],"between":[66],"agent":[68],"environment.":[71],"distribution":[74,94],"returns":[76],"for":[77,95],"each":[78],"state":[79],"estimated":[81],"from":[82],"this":[83,93],"historical":[84],"data,":[85],"with":[86],"neural":[88],"network":[89],"employed":[90],"model":[92],"enhanced":[96],"framework's":[99],"efficacy":[100],"validated":[102],"through":[103],"two":[104],"planning":[107],"scenarios,":[108],"where":[109],"it":[110],"surpasses":[111],"several":[112],"commonly":[113],"used":[114],"frameworks":[115],"demonstrates":[117],"notably":[118],"more":[119],"anticipatory":[120],"patterns.":[122]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
