{"id":"https://openalex.org/W4225836256","doi":"https://doi.org/10.1109/tiv.2022.3167616","title":"Deep Reinforcement Learning With NMPC Assistance Nash Switching for Urban Autonomous Driving","display_name":"Deep Reinforcement Learning With NMPC Assistance Nash Switching for Urban Autonomous Driving","publication_year":2022,"publication_date":"2022-04-14","ids":{"openalex":"https://openalex.org/W4225836256","doi":"https://doi.org/10.1109/tiv.2022.3167616"},"language":"en","primary_location":{"id":"doi:10.1109/tiv.2022.3167616","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tiv.2022.3167616","pdf_url":null,"source":{"id":"https://openalex.org/S4210199657","display_name":"IEEE Transactions on Intelligent Vehicles","issn_l":"2379-8858","issn":["2379-8858","2379-8904"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Vehicles","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043090172","display_name":"Sina Alighanbari","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Sina Alighanbari","raw_affiliation_strings":["SHEVS lab at the Department of Systems Design Engineering, University of Waterloo, Waterloo, ON, Canada"],"raw_orcid":"https://orcid.org/0000-0002-5409-7149","affiliations":[{"raw_affiliation_string":"SHEVS lab at the Department of Systems Design Engineering, University of Waterloo, Waterloo, ON, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033006881","display_name":"Nasser L. Azad","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Nasser L. Azad","raw_affiliation_strings":["Faculty of Engineering, Department of Systems Design Engineering, University of Waterloo, Waterloo, ON, Canada"],"raw_orcid":"https://orcid.org/0000-0003-1412-7961","affiliations":[{"raw_affiliation_string":"Faculty of Engineering, Department of Systems Design Engineering, University of Waterloo, Waterloo, ON, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5043090172"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":3.0521,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.92322423,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"8","issue":"3","first_page":"2604","last_page":"2615"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7039562463760376},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5421202778816223},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4259682893753052},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29779052734375},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2610096335411072},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.18013441562652588}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7039562463760376},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5421202778816223},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4259682893753052},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29779052734375},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2610096335411072},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.18013441562652588}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tiv.2022.3167616","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tiv.2022.3167616","pdf_url":null,"source":{"id":"https://openalex.org/S4210199657","display_name":"IEEE Transactions on Intelligent Vehicles","issn_l":"2379-8858","issn":["2379-8858","2379-8904"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Vehicles","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322622","display_name":"Toyota Motor Corporation","ror":"https://ror.org/02zqm6r10"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1515851193","https://openalex.org/W1658008008","https://openalex.org/W1923344279","https://openalex.org/W2097381042","https://openalex.org/W2120846115","https://openalex.org/W2124136621","https://openalex.org/W2145339207","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2343568200","https://openalex.org/W2462906003","https://openalex.org/W2604763608","https://openalex.org/W2750632489","https://openalex.org/W2754517384","https://openalex.org/W2788862220","https://openalex.org/W2795561664","https://openalex.org/W2822752092","https://openalex.org/W2837605352","https://openalex.org/W2893097039","https://openalex.org/W2897475915","https://openalex.org/W2905111361","https://openalex.org/W2912445127","https://openalex.org/W2930426397","https://openalex.org/W2947981406","https://openalex.org/W2963019567","https://openalex.org/W2963363446","https://openalex.org/W2963411833","https://openalex.org/W2963525569","https://openalex.org/W2963625099","https://openalex.org/W2963679616","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2967419209","https://openalex.org/W2971188105","https://openalex.org/W2989958156","https://openalex.org/W2990494268","https://openalex.org/W2998563746","https://openalex.org/W3034445502","https://openalex.org/W3121156862","https://openalex.org/W3127561923","https://openalex.org/W3129059695","https://openalex.org/W3134211337","https://openalex.org/W3206683817","https://openalex.org/W4233696721","https://openalex.org/W4300564113","https://openalex.org/W4300971732","https://openalex.org/W4319988532","https://openalex.org/W6636881020","https://openalex.org/W6674600207","https://openalex.org/W6678168664","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6718836005","https://openalex.org/W6729433768","https://openalex.org/W6736057607","https://openalex.org/W6744563498","https://openalex.org/W6748314335","https://openalex.org/W6748643490","https://openalex.org/W6757787546","https://openalex.org/W6772666310","https://openalex.org/W6788454488","https://openalex.org/W6849896277"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856"],"abstract_inverted_index":{"Deep":[0],"Deterministic":[1],"Policy":[2],"Gradient":[3],"(DDPG)":[4],"is":[5],"a":[6,30],"promising":[7],"reinforcement":[8],"learning":[9],"technique":[10],"with":[11,58,82,146,221,239],"the":[12,39,54,79],"potential":[13],"to":[14,52,65,98,175,224],"resolve":[15],"complicated":[16],"tasks":[17],"and":[18,48,69,86,100,125,148,151,164,171,183,196,202,209,218],"handle":[19],"high-dimensional":[20],"state/action":[21],"spaces.":[22],"However,":[23],"it":[24],"suffers":[25],"from":[26],"sample":[27],"inefficiency,":[28],"requiring":[29],"high":[31],"number":[32],"of":[33,56,71,92,179,185,194,215],"training":[34,55,113],"samples.":[35],"To":[36,108],"speed":[37],"up":[38],"training,":[40],"we":[41,111,234],"propose":[42],"<inline-formula":[43],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[44],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[45],"notation=\"LaTeX\">$\\epsilon$</tex-math></inline-formula>":[46],"-annealing":[47],"Q-learning":[49,80,124,145,162,195,217],"switching":[50,94],"methods":[51,95,122],"aid":[53],"DDPG":[57,99,222,240],"Nonlinear":[59],"Model":[60],"Predictive":[61],"Control":[62],"(NMPC)":[63],"controller":[64],"solve":[66],"priority":[67],"calculation":[68],"merging":[70,243],"autonomous":[72],"vehicles":[73],"at":[74,131],"roundabouts.":[75],"We":[76,118,142,160,191,211],"further":[77],"expand":[78],"switch":[81,104,129,163,167,198,208,216,220,227],"double":[83],"replay":[84],"memory":[85],"Nash":[87,103,166,189,197,207,219,226,236],"Q-value":[88],"updates.":[89,190],"The":[90],"performance":[91,205,238],"these":[93],"are":[96],"compared":[97],"demonstrate":[101,203],"that":[102,153],"outperforms":[105],"other":[106],"methods.":[107],"reduce":[109],"conservativeness,":[110],"test":[112,119],"using":[114],"variable":[115],"traffic":[116],"density.":[117],"three":[120],"selection":[121],"inside":[123],"show":[126,152],"constant":[127],"threshold":[128],"has":[130,155,229],"least":[132],"ten":[133],"times":[134],"higher":[135,157,230,248],"mean":[136,158,181,249],"reward":[137,182],"for":[138,188,241],"50":[139],"episodes":[140],"training.":[141],"also":[143],"compare":[144,161,235],"NMPC":[147,154,201],"PID":[149],"assistance":[150],"114%":[156],"reward.":[159,250],"novel":[165],"method":[168],"under":[169],"noise-free":[170],"noisy":[172],"input":[173],"conditions":[174],"prove":[176,225],"an":[177],"increase":[178],"35%":[180],"decrease":[184],"4%":[186],"std":[187],"analyze":[192],"efficacy":[193],"approaches":[199],"w.r.t":[200],"comparable":[204],"between":[206],"NMPC.":[210],"juxtapose":[212],"driving":[213],"results":[214],"algorithm":[223],"strategy":[228],"overall":[231],"performance.":[232],"Finally,":[233],"switch\u2019s":[237],"highway":[242],"scenario":[244],"which":[245],"shows":[246],"159%":[247]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
