{"id":"https://openalex.org/W7126233022","doi":"https://doi.org/10.1109/access.2026.3659797","title":"Stackelberg Game-Theoretic Safe MARL With Bilevel Control for Autonomous Driving","display_name":"Stackelberg Game-Theoretic Safe MARL With Bilevel Control for Autonomous Driving","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7126233022","doi":"https://doi.org/10.1109/access.2026.3659797"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3659797","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3659797","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3659797","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124460251","display_name":"Haoming Qu","orcid":null},"institutions":[{"id":"https://openalex.org/I108904480","display_name":"Shenyang Institute of Engineering","ror":"https://ror.org/02pfsj857","country_code":"CN","type":"education","lineage":["https://openalex.org/I108904480"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoming Qu","raw_affiliation_strings":["School of Automation, Shenyang Institute of Engineering, Shenyang, China"],"raw_orcid":"https://orcid.org/0009-0006-9063-3953","affiliations":[{"raw_affiliation_string":"School of Automation, Shenyang Institute of Engineering, Shenyang, China","institution_ids":["https://openalex.org/I108904480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124399003","display_name":"Xin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I108904480","display_name":"Shenyang Institute of Engineering","ror":"https://ror.org/02pfsj857","country_code":"CN","type":"education","lineage":["https://openalex.org/I108904480"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Li","raw_affiliation_strings":["School of Automation, Shenyang Institute of Engineering, Shenyang, China"],"raw_orcid":"https://orcid.org/0009-0002-2233-1241","affiliations":[{"raw_affiliation_string":"School of Automation, Shenyang Institute of Engineering, Shenyang, China","institution_ids":["https://openalex.org/I108904480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010472540","display_name":"Charles Z. Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Charles Z. Liu","raw_affiliation_strings":["School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124429446","display_name":"Jingyan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jingyan Zhang","raw_affiliation_strings":["School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0002-5698-6043","affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124451697","display_name":"Siyuan Zhuang","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Siyuan Zhuang","raw_affiliation_strings":["School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0001-1488-3743","affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124338348","display_name":"Ming Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ming Ma","raw_affiliation_strings":["School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0006-7265-5059","affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Faculty of Engineering, The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124460251"],"corresponding_institution_ids":["https://openalex.org/I108904480"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17717324,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"17506","last_page":"17524"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.36649999022483826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.36649999022483826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.35670000314712524,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1657000035047531,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stackelberg-competition","display_name":"Stackelberg competition","score":0.8440999984741211},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.613099992275238},{"id":"https://openalex.org/keywords/bilevel-optimization","display_name":"Bilevel optimization","score":0.4864000082015991},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.3734000027179718},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.36480000615119934},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.34049999713897705},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.3312999904155731}],"concepts":[{"id":"https://openalex.org/C199510392","wikidata":"https://www.wikidata.org/wiki/Q1184602","display_name":"Stackelberg competition","level":2,"score":0.8440999984741211},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7878000140190125},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.613099992275238},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.58160001039505},{"id":"https://openalex.org/C3309286","wikidata":"https://www.wikidata.org/wiki/Q4907693","display_name":"Bilevel optimization","level":3,"score":0.4864000082015991},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.3734000027179718},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.36480000615119934},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.34049999713897705},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C22171661","wikidata":"https://www.wikidata.org/wiki/Q1074380","display_name":"Stochastic game","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C159023740","wikidata":"https://www.wikidata.org/wiki/Q623276","display_name":"Deadlock","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2669000029563904},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.25690001249313354},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2565000057220459},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2551000118255615},{"id":"https://openalex.org/C79487989","wikidata":"https://www.wikidata.org/wiki/Q934680","display_name":"Vehicle dynamics","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2026.3659797","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3659797","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:9d08d3f561b24e03b2b893687d5a50b1","is_oa":true,"landing_page_url":"https://doaj.org/article/9d08d3f561b24e03b2b893687d5a50b1","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 17506-17524 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3659797","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3659797","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W586722081","https://openalex.org/W2105403067","https://openalex.org/W2343568200","https://openalex.org/W2607873356","https://openalex.org/W2761873684","https://openalex.org/W2766447205","https://openalex.org/W2977843878","https://openalex.org/W3034379033","https://openalex.org/W3037429136","https://openalex.org/W3111506700","https://openalex.org/W3140277255","https://openalex.org/W3169245465","https://openalex.org/W3193488849","https://openalex.org/W3210290940","https://openalex.org/W3214452188","https://openalex.org/W4223971808","https://openalex.org/W4283826293","https://openalex.org/W4293576385","https://openalex.org/W4296703356","https://openalex.org/W4309349085","https://openalex.org/W4311114086","https://openalex.org/W4316468867","https://openalex.org/W4322729780","https://openalex.org/W4362720664","https://openalex.org/W4365790382","https://openalex.org/W4366957357","https://openalex.org/W4382203535","https://openalex.org/W4382981485","https://openalex.org/W4385071241","https://openalex.org/W4388286312","https://openalex.org/W4390747816","https://openalex.org/W4400526478","https://openalex.org/W4401218978","https://openalex.org/W4401510411","https://openalex.org/W4404317266","https://openalex.org/W4404580175","https://openalex.org/W4408498267","https://openalex.org/W4411866959","https://openalex.org/W7083631372"],"related_works":[],"abstract_inverted_index":{"Ensuring":[0],"safety":[1,64,77,202],"in":[2],"interactive":[3],"autonomous":[4],"driving":[5,50],"remains":[6],"a":[7,43,53,67,100,109,113,123,133,173,228,247,258],"core":[8],"challenge":[9],"for":[10,269],"reinforcement":[11],"learning":[12,211],"(RL),":[13],"since":[14],"agents":[15],"must":[16],"act":[17],"under":[18],"uncertainty":[19],"and":[20,34,79,122,160,194,209,230,236,257],"rare":[21],"but":[22],"critical":[23],"events":[24],"(e.g.,":[25],"collisions)":[26],"while":[27,204,263],"respecting":[28],"traffic":[29,252],"rules":[30],"such":[31],"as":[32,52],"yielding":[33],"right-of-way.":[35],"To":[36,90],"address":[37],"these":[38],"challenges,":[39],"we":[40,245],"propose":[41],"DiStaK,":[42],"distributional":[44,237],"Stackelberg":[45,97,224,261],"RL":[46],"framework":[47],"that":[48,219,231],"models":[49],"interactions":[51],"bilevel":[54],"leader\u2013follower":[55],"game.":[56],"A":[57],"practical":[58,248],"discrete":[59],"instantiation,":[60],"DiStaK-C51,":[61],"augments":[62],"the":[63,73,168,220],"layer":[65],"with":[66,99,151,172],"C51-based":[68],"cost":[69],"head":[70],"to":[71,176,250],"estimate":[72],"full":[74],"distribution":[75,87],"of":[76],"costs":[78],"constructs":[80],"chance-constrained":[81],"admissible":[82],"action":[83],"sets":[84,182],"via":[85,253],"cumulative":[86],"(CDF)":[88],"thresholding.":[89],"improve":[91],"efficiency,":[92],"DiStaK-C51":[93,187],"replaces":[94],"exhaustive":[95],"joint-action":[96],"enumeration":[98],"retriever\u2013refiner":[101],"Top-<italic":[102,232],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[103,105,139,141,145,147,149,158,191,233],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">K</i>\u2019/Top-<italic":[104],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">K</i>":[106,234],"selection":[107],"rule:":[108],"lightweight":[110],"retriever":[111],"produces":[112],"small":[114],"candidate":[115],"list,":[116],"chance-constraint":[117],"screening":[118],"filters":[119],"unsafe":[120],"actions,":[121],"final":[124],"Top-K":[125],"shortlist":[126],"supports":[127],"critic-based":[128],"refinement.":[129],"The":[130],"follower":[131],"selects":[132],"risk-aware":[134],"best":[135],"response":[136],"using":[137],"<italic":[138,190],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Q</i><sub":[140],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sub>":[142,150],"-":[143],"\u03bb<sub":[144,157],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sub><italic":[146],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">C</i><sub":[148],"an":[152,240],"adaptive":[153],"dual":[154],"update":[155],"on":[156,167,188],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sub>,":[159],"leader":[161],"actions":[162],"can":[163],"be":[164],"screened":[165,222],"based":[166],"induced":[169],"interaction":[170],"outcome,":[171],"relaxation":[174],"fallback":[175],"avoid":[177],"deadlock":[178],"when":[179],"estimated":[180],"safe":[181,223],"are":[183],"empty.":[184],"We":[185,213],"evaluate":[186],"standard":[189],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">two-vehicle</i>":[192],"merge":[193],"roundabout":[195],"benchmarks,":[196],"where":[197],"it":[198],"achieves":[199],"substantially":[200],"improved":[201],"metrics":[203],"maintaining":[205],"strong":[206],"task":[207],"performance":[208],"stable":[210],"dynamics.":[212],"also":[214],"provide":[215],"theoretical":[216],"analysis":[217],"showing":[218],"(fallback-augmented)":[221],"Bellman":[225],"operator":[226],"is":[227,267],"contraction":[229],"shortlisting":[235],"projection":[238],"yield":[239],"explicit":[241],"\u03f5-neighborhood":[242],"bound.":[243],"Finally,":[244],"outline":[246],"extension":[249],"multi-vehicle":[251,265],"rule-based":[254],"role":[255],"assignment":[256],"horizontal":[259],"two-level":[260],"expansion,":[262],"comprehensive":[264],"evaluation":[266],"left":[268],"future":[270],"work.":[271]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2026-02-01T00:00:00"}
