{"id":"https://openalex.org/W4415123975","doi":"https://doi.org/10.1109/tits.2025.3611296","title":"Multi-Option Hierarchical Reinforcement Learning Framework With State Segmentation for Mixed On-Ramp Merging","display_name":"Multi-Option Hierarchical Reinforcement Learning Framework With State Segmentation for Mixed On-Ramp Merging","publication_year":2025,"publication_date":"2025-10-13","ids":{"openalex":"https://openalex.org/W4415123975","doi":"https://doi.org/10.1109/tits.2025.3611296"},"language":"en","primary_location":{"id":"doi:10.1109/tits.2025.3611296","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2025.3611296","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102616706","display_name":"Zoutao Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zoutao Wen","raw_affiliation_strings":["School of Information and Electronics, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-7233-9817","affiliations":[{"raw_affiliation_string":"School of Information and Electronics, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100751506","display_name":"Huachun Tan","orcid":"https://orcid.org/0000-0001-5968-103X"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huachun Tan","raw_affiliation_strings":["School of Interdisciplinary Science, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5968-103X","affiliations":[{"raw_affiliation_string":"School of Interdisciplinary Science, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024997323","display_name":"Yanan Zhao","orcid":"https://orcid.org/0000-0002-5549-5798"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanan Zhao","raw_affiliation_strings":["School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5549-5798","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377402","display_name":"Hailong Zhang","orcid":"https://orcid.org/0000-0002-1764-0392"},"institutions":[{"id":"https://openalex.org/I135714990","display_name":"North University of China","ror":"https://ror.org/047bp1713","country_code":"CN","type":"education","lineage":["https://openalex.org/I135714990"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hailong Zhang","raw_affiliation_strings":["School of Mechanical Engineering, North University of China, Taiyuan, China"],"raw_orcid":"https://orcid.org/0000-0002-1764-0392","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, North University of China, Taiyuan, China","institution_ids":["https://openalex.org/I135714990"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092031092","display_name":"Peifeng Li","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peifeng Li","raw_affiliation_strings":["School of Information and Electronics, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-4969-4334","affiliations":[{"raw_affiliation_string":"School of Information and Electronics, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060220624","display_name":"Xinguo Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinguo Chen","raw_affiliation_strings":["Chinese Academy of Sciences, Institute of Software, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Software, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044921383","display_name":"Bolin Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bolin Gao","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5582-7289","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102616706"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39309595,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"26","issue":"12","first_page":"22246","last_page":"22261"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12782","display_name":"Assembly Line Balancing Optimization","score":0.9412000179290771,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12782","display_name":"Assembly Line Balancing Optimization","score":0.9412000179290771,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8113999962806702},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.7070000171661377},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6553999781608582},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5630999803543091},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5612000226974487},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.5174000263214111},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.43950000405311584},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.3885999917984009}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8113999962806702},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.7070000171661377},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6775000095367432},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6553999781608582},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5630999803543091},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5612000226974487},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5601999759674072},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.5174000263214111},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5034000277519226},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.43950000405311584},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.3885999917984009},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.3723999857902527},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.3497999906539917},{"id":"https://openalex.org/C2987015589","wikidata":"https://www.wikidata.org/wiki/Q1040098","display_name":"Learning network","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C2984634286","wikidata":"https://www.wikidata.org/wiki/Q1331926","display_name":"Decision process","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28049999475479126},{"id":"https://openalex.org/C82142266","wikidata":"https://www.wikidata.org/wiki/Q3456604","display_name":"Dynamic Bayesian network","level":3,"score":0.26660001277923584},{"id":"https://openalex.org/C47796450","wikidata":"https://www.wikidata.org/wiki/Q508378","display_name":"Intelligent transportation system","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2597000002861023}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tits.2025.3611296","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2025.3611296","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2000345695","display_name":null,"funder_award_id":"52402522","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3409751313","display_name":null,"funder_award_id":"2023YFB2504704-02","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7887860652","display_name":null,"funder_award_id":"Z241100003524011","funder_id":"https://openalex.org/F4320335843","funder_display_name":"Beijing Science and Technology Planning Project"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335843","display_name":"Beijing Science and Technology Planning Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1965455100","https://openalex.org/W2109910161","https://openalex.org/W2120465407","https://openalex.org/W2145339207","https://openalex.org/W2296073425","https://openalex.org/W2963625099","https://openalex.org/W2964227312","https://openalex.org/W3023373481","https://openalex.org/W3044015199","https://openalex.org/W3048493514","https://openalex.org/W3090027660","https://openalex.org/W3119387319","https://openalex.org/W3120230705","https://openalex.org/W3120624913","https://openalex.org/W3121012756","https://openalex.org/W3132669183","https://openalex.org/W3138181334","https://openalex.org/W3142849873","https://openalex.org/W3168892396","https://openalex.org/W3179586425","https://openalex.org/W3185625797","https://openalex.org/W3187793874","https://openalex.org/W3193464862","https://openalex.org/W3205367325","https://openalex.org/W3206563773","https://openalex.org/W4205754139","https://openalex.org/W4220920561","https://openalex.org/W4283697860","https://openalex.org/W4312260978","https://openalex.org/W4379469740","https://openalex.org/W4381733133","https://openalex.org/W4386003447","https://openalex.org/W4387914680","https://openalex.org/W4390547633","https://openalex.org/W4391770610","https://openalex.org/W4391781515","https://openalex.org/W4392112282","https://openalex.org/W4400645868","https://openalex.org/W4402742688","https://openalex.org/W4405283472","https://openalex.org/W6922480057"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1,30],"Learning":[2,31],"(DRL)":[3],"has":[4],"achieved":[5],"significant":[6],"advancements":[7],"in":[8,49,66],"the":[9,41,47,94,98,115,134,145,163,176,187],"transportation":[10,69],"domain,":[11],"effectively":[12],"enhancing":[13],"traffic":[14],"network":[15],"efficiency,":[16],"reducing":[17],"pollutant":[18],"emissions,":[19],"and":[20,39,58,113,149,182,189],"improving":[21],"driving":[22],"safety.":[23],"A":[24],"prominent":[25],"approach":[26,178],"within":[27],"DRL,":[28],"Hierarchical":[29],"(HRL),":[32],"simplifies":[33],"complex":[34],"tasks":[35],"by":[36],"grouping":[37],"states":[38],"decomposing":[40],"Markov":[42],"Decision":[43],"Process":[44],"(MDP),":[45],"facilitating":[46],"exploration":[48],"multi-dimensional":[50],"state":[51,56,88],"spaces.":[52],"These":[53],"concepts":[54],"of":[55,101,141],"abstraction":[57,60],"temporal":[59],"prove":[61],"to":[62,120],"be":[63],"particularly":[64],"beneficial":[65],"complex,":[67],"high-risk":[68],"scenarios,":[70],"such":[71],"as":[72],"on-ramp":[73,123,158,190],"merging.":[74],"In":[75],"this":[76,78],"context,":[77],"paper":[79],"introduces":[80],"a":[81,121,155,184],"novel":[82],"Multi-Option":[83],"HRL":[84],"(MO-HRL)":[85],"framework":[86,96],"with":[87,104,151],"segmentation.":[89],"Unlike":[90],"traditional":[91],"option-based":[92],"HRL,":[93],"proposed":[95,177],"enables":[97],"simultaneous":[99],"activation":[100],"multiple":[102],"options,":[103],"each":[105],"option":[106],"observing":[107],"diverse":[108],"states.":[109],"After":[110],"carefully":[111],"defining":[112],"justifying":[114],"framework,":[116],"we":[117],"apply":[118],"MO-HRL":[119,135],"simplified":[122],"merging":[124,159],"scenario.":[125],"To":[126],"enhance":[127],"training,":[128],"curriculum":[129],"learning":[130],"is":[131,167],"incorporated":[132],"into":[133],"framework.":[136],"Extensive":[137],"experiments":[138],"involve":[139],"discussions":[140],"different":[142],"training":[143],"modes,":[144],"\u201cshared":[146],"critic\u201d":[147],"problem,":[148],"comparisons":[150],"state-of-the-art":[152],"baselines.":[153],"Additionally,":[154],"six-lane":[156],"mainline":[157,188],"scenario,":[160],"based":[161],"on":[162],"NGSIM":[164],"I-80":[165],"dataset,":[166],"constructed.":[168],"Simulation":[169],"results":[170],"from":[171],"both":[172],"scenarios":[173],"show":[174],"that":[175],"outperforms":[179],"existing":[180],"methods":[181],"maintains":[183],"balance":[185],"between":[186],"traffic.":[191]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-14T00:00:00"}
