{"id":"https://openalex.org/W4404317266","doi":"https://doi.org/10.1109/tai.2024.3497919","title":"Safe Multiagent Reinforcement Learning With Bilevel Optimization in Autonomous Driving","display_name":"Safe Multiagent Reinforcement Learning With Bilevel Optimization in Autonomous Driving","publication_year":2024,"publication_date":"2024-11-13","ids":{"openalex":"https://openalex.org/W4404317266","doi":"https://doi.org/10.1109/tai.2024.3497919"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2024.3497919","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3497919","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101492948","display_name":"Zhi Zheng","orcid":"https://orcid.org/0000-0002-5999-6629"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Zhi Zheng","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062677595","display_name":"Shangding Gu","orcid":"https://orcid.org/0000-0002-2722-3779"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Shangding Gu","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101492948"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":2.0926,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87754652,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"6","issue":"4","first_page":"829","last_page":"842"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9564999938011169,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9564999938011169,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9444000124931335,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12617","display_name":"Energy, Environment, and Transportation Policies","score":0.9023000001907349,"subfield":{"id":"https://openalex.org/subfields/2105","display_name":"Renewable Energy, Sustainability and the Environment"},"field":{"id":"https://openalex.org/fields/21","display_name":"Energy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bilevel-optimization","display_name":"Bilevel optimization","score":0.8230482935905457},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8074729442596436},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.580902636051178},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.445472776889801},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.293113112449646},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2895103693008423},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.1270289421081543},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.09654510021209717}],"concepts":[{"id":"https://openalex.org/C3309286","wikidata":"https://www.wikidata.org/wiki/Q4907693","display_name":"Bilevel optimization","level":3,"score":0.8230482935905457},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8074729442596436},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.580902636051178},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.445472776889801},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.293113112449646},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2895103693008423},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1270289421081543},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.09654510021209717},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2024.3497919","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3497919","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1845972764","https://openalex.org/W1965455100","https://openalex.org/W2087065496","https://openalex.org/W2089415692","https://openalex.org/W2117179049","https://openalex.org/W2137514195","https://openalex.org/W2148059924","https://openalex.org/W2164902667","https://openalex.org/W2616635592","https://openalex.org/W2726500840","https://openalex.org/W2979998532","https://openalex.org/W2991046523","https://openalex.org/W3109732654","https://openalex.org/W3173294282","https://openalex.org/W3201315469","https://openalex.org/W3203930949","https://openalex.org/W4205501012","https://openalex.org/W4206497039","https://openalex.org/W4285603252","https://openalex.org/W4292313830","https://openalex.org/W4312315130","https://openalex.org/W4327571609","https://openalex.org/W4381733136","https://openalex.org/W4383097628","https://openalex.org/W4388616010","https://openalex.org/W4402402057","https://openalex.org/W6638018090","https://openalex.org/W6676416636","https://openalex.org/W6678168664","https://openalex.org/W6737893269","https://openalex.org/W6738796088","https://openalex.org/W6740371788","https://openalex.org/W6741067646","https://openalex.org/W6751725685","https://openalex.org/W6752380930","https://openalex.org/W6766805167","https://openalex.org/W6767785798","https://openalex.org/W6780559895","https://openalex.org/W6789259823","https://openalex.org/W6791684085","https://openalex.org/W6800682094","https://openalex.org/W6802002411","https://openalex.org/W6840380725","https://openalex.org/W6852489929"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2479207418","https://openalex.org/W4237041411","https://openalex.org/W1588628884","https://openalex.org/W1994745260","https://openalex.org/W2382404424","https://openalex.org/W4310083477","https://openalex.org/W2328553770"],"abstract_inverted_index":{"Ensuring":[0],"safety":[1,36,41,180],"in":[2,11,60,74,116],"multiagent":[3,105],"reinforcement":[4],"learning":[5],"(MARL),":[6],"particularly":[7,59],"when":[8],"deploying":[9],"it":[10],"real-world":[12],"applications":[13,121],"such":[14,122,149,171],"as":[15,19,123,150,172],"autonomous":[16,119,138,146],"driving,":[17],"emerges":[18],"a":[20,69,75,135],"critical":[21],"challenge.":[22],"To":[23,126],"address":[24],"this":[25,65],"challenge,":[26],"traditional":[27],"safe":[28,46,70,136],"MARL":[29,32,47,71,114,137,169],"methods":[30],"extend":[31],"approaches":[33],"to":[34,39,51,112],"incorporate":[35],"considerations,":[37],"aiming":[38],"minimize":[40],"risk":[42],"values.":[43],"However,":[44],"these":[45],"algorithms":[48],"often":[49],"fail":[50],"model":[52,77],"other":[53],"agents":[54],"and":[55,102,141,154,164,175,179],"lack":[56],"convergence":[57,83],"guarantees,":[58],"dynamically":[61],"complex":[62],"environments.":[63],"In":[64],"study,":[66],"we":[67,92,133],"propose":[68],"method":[72],"grounded":[73],"Stackelberg":[76,99,104],"with":[78],"bilevel":[79],"optimization,":[80],"for":[81],"which":[82],"analysis":[84],"is":[85],"provided.":[86],"Derived":[87],"from":[88],"our":[89,131,161],"theoretical":[90],"analysis,":[91],"develop":[93],"two":[94],"practical":[95],"algorithms,":[96,132,162],"namely":[97],"constrained":[98,103],"Q-learning":[100],"(CSQ)":[101],"deep":[106],"deterministic":[107],"policy":[108],"gradient":[109],"(CS-MADDPG),":[110],"designed":[111],"facilitate":[113],"decision-making":[115],"some":[117],"simulated":[118],"driving":[120,139,147],"traffic":[124],"management.":[125],"evaluate":[127],"the":[128],"effectiveness":[129],"of":[130],"developed":[134],"benchmark":[140],"conducted":[142],"experiments":[143],"on":[144],"challenging":[145],"scenarios,":[148],"merges,":[151],"roundabouts,":[152],"intersections,":[153],"racetracks.":[155],"The":[156],"experimental":[157],"results":[158],"indicate":[159],"that":[160],"CSQ":[163],"CS-MADDPG,":[165],"outperform":[166],"several":[167],"strong":[168],"baselines,":[170],"Bi-AC,":[173],"MACPO,":[174],"MAPPO-L,":[176],"regarding":[177],"reward":[178],"performance.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}