{"id":"https://openalex.org/W4407404519","doi":"https://doi.org/10.1109/tvt.2025.3541401","title":"Bilevel Multi-Armed Bandit-Based Hierarchical Reinforcement Learning for Interaction-Aware Self-Driving at Unsignalized Intersections","display_name":"Bilevel Multi-Armed Bandit-Based Hierarchical Reinforcement Learning for Interaction-Aware Self-Driving at Unsignalized Intersections","publication_year":2025,"publication_date":"2025-02-12","ids":{"openalex":"https://openalex.org/W4407404519","doi":"https://doi.org/10.1109/tvt.2025.3541401"},"language":"en","primary_location":{"id":"doi:10.1109/tvt.2025.3541401","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvt.2025.3541401","pdf_url":null,"source":{"id":"https://openalex.org/S10936095","display_name":"IEEE Transactions on Vehicular Technology","issn_l":"0018-9545","issn":["0018-9545","1939-9359"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Vehicular Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102764782","display_name":"Zengqi Peng","orcid":"https://orcid.org/0009-0001-1801-4297"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zengqi Peng","raw_affiliation_strings":["Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-1801-4297","affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yubin Wang","orcid":"https://orcid.org/0000-0002-0129-7294"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yubin Wang","raw_affiliation_strings":["Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0129-7294","affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028544958","display_name":"Lei Zheng","orcid":"https://orcid.org/0000-0002-8603-0096"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei Zheng","raw_affiliation_strings":["Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-8603-0096","affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053340150","display_name":"Jun Ma","orcid":"https://orcid.org/0000-0002-9405-8232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Ma","raw_affiliation_strings":["Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-9405-8232","affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Systems Thrust, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102764782"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1091,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.84935803,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"74","issue":"6","first_page":"8824","last_page":"8838"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bilevel-optimization","display_name":"Bilevel optimization","score":0.7339515686035156},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7162949442863464},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5277795791625977},{"id":"https://openalex.org/keywords/transport-engineering","display_name":"Transport engineering","score":0.5110586881637573},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.49196431040763855},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.3860471248626709},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3262152671813965},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.32289013266563416},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25849229097366333},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.1403263509273529},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08646145462989807},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.0792762041091919}],"concepts":[{"id":"https://openalex.org/C3309286","wikidata":"https://www.wikidata.org/wiki/Q4907693","display_name":"Bilevel optimization","level":3,"score":0.7339515686035156},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7162949442863464},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5277795791625977},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.5110586881637573},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.49196431040763855},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.3860471248626709},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3262152671813965},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.32289013266563416},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25849229097366333},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.1403263509273529},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08646145462989807},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.0792762041091919}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tvt.2025.3541401","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvt.2025.3541401","pdf_url":null,"source":{"id":"https://openalex.org/S10936095","display_name":"IEEE Transactions on Vehicular Technology","issn_l":"0018-9545","issn":["0018-9545","1939-9359"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Vehicular Technology","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-146728","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-146728","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5600000023841858,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G7804376327","display_name":null,"funder_award_id":"62303390","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1701438256","https://openalex.org/W1977565586","https://openalex.org/W2077902449","https://openalex.org/W2296073425","https://openalex.org/W2343568200","https://openalex.org/W2559190048","https://openalex.org/W2842089854","https://openalex.org/W2897522585","https://openalex.org/W2903212089","https://openalex.org/W2922458387","https://openalex.org/W2951360122","https://openalex.org/W2989730386","https://openalex.org/W2997958396","https://openalex.org/W3088218629","https://openalex.org/W3099089609","https://openalex.org/W3121095832","https://openalex.org/W3181376804","https://openalex.org/W3190903164","https://openalex.org/W3204990527","https://openalex.org/W3205257000","https://openalex.org/W3208122016","https://openalex.org/W3209562871","https://openalex.org/W4200630212","https://openalex.org/W4206275166","https://openalex.org/W4226170924","https://openalex.org/W4285230563","https://openalex.org/W4297095020","https://openalex.org/W4313142062","https://openalex.org/W4313534875","https://openalex.org/W4381733133","https://openalex.org/W4381733201","https://openalex.org/W4382998910","https://openalex.org/W4386412395","https://openalex.org/W4387885597","https://openalex.org/W4391769604","https://openalex.org/W4391936140","https://openalex.org/W4394863034","https://openalex.org/W4401415702","https://openalex.org/W4405787549","https://openalex.org/W6745935785"],"related_works":["https://openalex.org/W2479207418","https://openalex.org/W4237041411","https://openalex.org/W1588628884","https://openalex.org/W1994745260","https://openalex.org/W2382404424","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2012267561","https://openalex.org/W3018909868","https://openalex.org/W2920061524"],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3],"present":[4],"BiM-ACPPO,":[5],"a":[6,104],"bilevel":[7,105],"multi-armed":[8],"bandit-based":[9],"hierarchical":[10],"reinforcement":[11],"learning":[12,107],"framework":[13],"for":[14,67],"interaction-aware":[15,65],"decision-making":[16],"and":[17,45,74,147],"planning":[18],"at":[19,91],"unsignalized":[20,92],"intersections.":[21],"Essentially,":[22],"it":[23],"proactively":[24],"takes":[25],"the":[26,40,46,58,77,81,86,94,98,113,122,130,134,143,148,173,178],"uncertainties":[27],"associated":[28],"with":[29],"surrounding":[30],"vehicles":[31],"(SVs)":[32],"into":[33],"consideration,":[34],"which":[35,109],"encompass":[36],"those":[37],"stemming":[38],"from":[39],"driver's":[41],"intention,":[42],"interactive":[43],"behaviors,":[44],"varying":[47],"number":[48],"of":[49,80,89,97,133,177],"SVs.":[50],"Intermediate":[51],"decision":[52],"variables":[53],"are":[54,125,140],"introduced":[55],"to":[56,62,158],"enable":[57],"high-level":[59],"RL":[60,99,135],"policy":[61,100],"provide":[63],"an":[64],"reference,":[66],"guiding":[68],"low-level":[69],"model":[70],"predictive":[71],"control":[72],"(MPC)":[73],"further":[75],"enhancing":[76],"generalization":[78,175],"ability":[79],"proposed":[82,114,179],"framework.":[83],"By":[84],"leveraging":[85],"structured":[87],"nature":[88],"self-driving":[90],"intersections,":[93],"training":[95,123,136],"problem":[96],"is":[101,110,119],"modeled":[102],"as":[103],"curriculum":[106],"task,":[108],"addressed":[111],"by":[112],"Exp3.S-based":[115],"BiMAB":[116],"algorithm.":[117],"It":[118],"noteworthy":[120],"that":[121,151],"curricula":[124],"dynamically":[126],"adjusted,":[127],"thereby":[128],"facilitating":[129],"sample":[131],"efficiency":[132],"process.":[137],"Comparative":[138],"experiments":[139],"conducted":[141],"in":[142,165],"high-fidelity":[144],"CARLA":[145],"simulator,":[146],"results":[149,164],"indicate":[150],"our":[152],"approach":[153],"achieves":[154],"superior":[155],"performance":[156,176],"compared":[157],"all":[159],"baseline":[160],"methods.":[161],"Furthermore,":[162],"experimental":[163],"two":[166],"new":[167],"urban":[168],"driving":[169],"scenarios":[170],"clearly":[171],"demonstrate":[172],"commendable":[174],"method.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
