{"id":"https://openalex.org/W3113171483","doi":"https://doi.org/10.1109/icra48506.2021.9560777","title":"Amortized Q-learning with Model-based Action Proposals for Autonomous Driving on Highways","display_name":"Amortized Q-learning with Model-based Action Proposals for Autonomous Driving on Highways","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3113171483","doi":"https://doi.org/10.1109/icra48506.2021.9560777","mag":"3113171483"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9560777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9560777","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.03234","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085511316","display_name":"Branka Mirchevska","orcid":null},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]},{"id":"https://openalex.org/I4210156768","display_name":"BMW Group (Germany)","ror":"https://ror.org/044kkbh92","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210156768"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Branka Mirchevska","raw_affiliation_strings":["BMW Group, Unterschleissheim, Germany","Dept. of Computer Science, University of Freiburg, Germany","Albert-Ludwigs Univ. of Freiburg"],"affiliations":[{"raw_affiliation_string":"BMW Group, Unterschleissheim, Germany","institution_ids":["https://openalex.org/I4210156768"]},{"raw_affiliation_string":"Dept. of Computer Science, University of Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]},{"raw_affiliation_string":"Albert-Ludwigs Univ. of Freiburg","institution_ids":["https://openalex.org/I161046081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108115233","display_name":"Maria H\u00fcgle","orcid":null},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Maria Hugle","raw_affiliation_strings":["Dept. of Computer Science, University of Freiburg, Germany","Albert-Ludwigs Univ. of Freiburg"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, University of Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]},{"raw_affiliation_string":"Albert-Ludwigs Univ. of Freiburg","institution_ids":["https://openalex.org/I161046081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033350585","display_name":"Gabriel Kalweit","orcid":"https://orcid.org/0000-0003-4581-8810"},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gabriel Kalweit","raw_affiliation_strings":["Dept. of Computer Science, University of Freiburg, Germany","Albert-Ludwigs Univ. of Freiburg"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, University of Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]},{"raw_affiliation_string":"Albert-Ludwigs Univ. of Freiburg","institution_ids":["https://openalex.org/I161046081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027250190","display_name":"Moritz Werling","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156768","display_name":"BMW Group (Germany)","ror":"https://ror.org/044kkbh92","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210156768"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Moritz Werling","raw_affiliation_strings":["BMW Group, Unterschleissheim, Germany","[BMW Group, Unterschleissheim, Germany]"],"affiliations":[{"raw_affiliation_string":"BMW Group, Unterschleissheim, Germany","institution_ids":["https://openalex.org/I4210156768"]},{"raw_affiliation_string":"[BMW Group, Unterschleissheim, Germany]","institution_ids":["https://openalex.org/I4210156768"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038908529","display_name":"Joschka Boedecker","orcid":"https://orcid.org/0000-0002-3486-7345"},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]},{"id":"https://openalex.org/I4210098324","display_name":"Bernstein Center for Computational Neuroscience Freiburg","ror":"https://ror.org/010p50m34","country_code":"DE","type":"government","lineage":["https://openalex.org/I4210092532","https://openalex.org/I4210098324"]},{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Joschka Boedecker","raw_affiliation_strings":["Cluster of Excellence BrainLinks-BrainTools, Freiburg, Germany","Dept. of Computer Science, University of Freiburg, Germany","University of Freiburg,Department of Computer Science,Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence BrainLinks-BrainTools, Freiburg, Germany","institution_ids":["https://openalex.org/I4210098324","https://openalex.org/I4210113520"]},{"raw_affiliation_string":"Dept. of Computer Science, University of Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]},{"raw_affiliation_string":"University of Freiburg,Department of Computer Science,Germany","institution_ids":["https://openalex.org/I161046081"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5085511316"],"corresponding_institution_ids":["https://openalex.org/I161046081","https://openalex.org/I4210156768"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00465802,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1028","last_page":"1035"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7301344871520996},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.7041013240814209},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7019339799880981},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.695499837398529},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6881885528564453},{"id":"https://openalex.org/keywords/time-horizon","display_name":"Time horizon","score":0.5832158923149109},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.5753343105316162},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.562156081199646},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.558493971824646},{"id":"https://openalex.org/keywords/trajectory-optimization","display_name":"Trajectory optimization","score":0.5475531220436096},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5188614130020142},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.40102261304855347},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2655661702156067},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15999335050582886}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7301344871520996},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.7041013240814209},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7019339799880981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.695499837398529},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6881885528564453},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.5832158923149109},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.5753343105316162},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.562156081199646},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.558493971824646},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.5475531220436096},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5188614130020142},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.40102261304855347},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2655661702156067},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15999335050582886},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra48506.2021.9560777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9560777","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2012.03234","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.03234","pdf_url":"https://arxiv.org/pdf/2012.03234","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3113171483","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2012.03234","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2012.03234","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2012.03234","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2012.03234","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.03234","pdf_url":"https://arxiv.org/pdf/2012.03234","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3113171483.pdf","grobid_xml":"https://content.openalex.org/works/W3113171483.grobid-xml"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1757796397","https://openalex.org/W1869778509","https://openalex.org/W1965455100","https://openalex.org/W1968651586","https://openalex.org/W2107338474","https://openalex.org/W2109910161","https://openalex.org/W2112930335","https://openalex.org/W2121806728","https://openalex.org/W2121863487","https://openalex.org/W2257979135","https://openalex.org/W2304209433","https://openalex.org/W2394607849","https://openalex.org/W2404189583","https://openalex.org/W2406067508","https://openalex.org/W2530849036","https://openalex.org/W2729776805","https://openalex.org/W2753798143","https://openalex.org/W2766447205","https://openalex.org/W2783963507","https://openalex.org/W2795064979","https://openalex.org/W2904263972","https://openalex.org/W2909906617","https://openalex.org/W2963322416","https://openalex.org/W2964007901","https://openalex.org/W2971510799","https://openalex.org/W2975429944","https://openalex.org/W2990417263","https://openalex.org/W2991030916","https://openalex.org/W3002855444","https://openalex.org/W3012722533","https://openalex.org/W3047548613","https://openalex.org/W3091528360","https://openalex.org/W3102777717","https://openalex.org/W3102797050","https://openalex.org/W6639086533","https://openalex.org/W6744580074","https://openalex.org/W6775492281"],"related_works":["https://openalex.org/W3205019221","https://openalex.org/W2049195806","https://openalex.org/W3004162282","https://openalex.org/W3091166409","https://openalex.org/W3045608587","https://openalex.org/W3003747652","https://openalex.org/W3207043982","https://openalex.org/W2958744321","https://openalex.org/W3044863446","https://openalex.org/W3088333196","https://openalex.org/W2970065535","https://openalex.org/W3096342673","https://openalex.org/W3161433433","https://openalex.org/W2773507491","https://openalex.org/W3136318222","https://openalex.org/W3126410360","https://openalex.org/W2077245982","https://openalex.org/W3024120247","https://openalex.org/W3139226422","https://openalex.org/W3185132436"],"abstract_inverted_index":{"Well-established":[0],"optimization-based":[1],"methods":[2],"can":[3],"guarantee":[4],"an":[5,71,98,178],"optimal":[6,25,72,99,111],"trajectory":[7,26,82,95],"for":[8,27,48,103],"a":[9,17,21,35,56,86,94,129,166],"short":[10,29],"optimization":[11],"horizon,":[12],"typically":[13],"no":[14],"longer":[15],"than":[16,157],"few":[18],"seconds.":[19],"As":[20],"result,":[22],"choosing":[23],"the":[24,40,43,65,78,118,125,145,158],"this":[28,61],"horizon":[30],"may":[31],"still":[32],"result":[33],"in":[34,55,144],"sub-optimal":[36],"long-term":[37,73,100],"solution.":[38],"At":[39],"same":[41],"time,":[42],"resulting":[44],"short-term":[45],"trajectories":[46],"allow":[47],"effective,":[49],"comfortable":[50],"and":[51,124,150,177],"provable":[52],"safe":[53],"maneuvers":[54,112],"dynamic":[57],"traffic":[58,147],"environment.":[59],"In":[60],"work,":[62],"we":[63,115,162],"address":[64],"question":[66],"of":[67,80,128,132],"how":[68],"to":[69,153],"ensure":[70],"driving":[74,104],"strategy,":[75],"while":[76],"keeping":[77],"benefits":[79],"classical":[81],"planning.":[83],"We":[84,137],"introduce":[85],"Reinforcement":[87],"Learning":[88],"based":[89],"approach":[90],"that":[91],"coupled":[92],"with":[93],"planner,":[96],"learns":[97],"decision-making":[101],"strategy":[102],"on":[105,141],"highways.":[106],"By":[107],"online":[108],"generating":[109],"locally":[110],"as":[113],"actions,":[114],"balance":[116],"between":[117],"infinite":[119],"low-level":[120],"continuous":[121],"action":[122,168],"space,":[123],"limited":[126],"flexibility":[127],"fixed":[130],"number":[131],"predefined":[133],"standard":[134],"lane-change":[135],"actions.":[136],"evaluated":[138],"our":[139],"method":[140],"realistic":[142],"scenarios":[143],"open-source":[146],"simulator":[148],"SUMO":[149],"were":[151],"able":[152],"achieve":[154],"better":[155],"performance":[156],"4":[159],"benchmark":[160],"approaches":[161],"compared":[163],"against,":[164],"including":[165],"random":[167],"selecting":[169],"agent,":[170,172],"greedy":[171],"high-level,":[173],"discrete":[174],"actions":[175],"agent":[176],"IDM-based":[179],"SUMO-controlled":[180],"agent.":[181]},"counts_by_year":[],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
