{"id":"https://openalex.org/W4308080451","doi":"https://doi.org/10.1109/itsc55140.2022.9922100","title":"Offline Reinforcement Learning for Autonomous Driving with Real World Driving Data","display_name":"Offline Reinforcement Learning for Autonomous Driving with Real World Driving Data","publication_year":2022,"publication_date":"2022-10-08","ids":{"openalex":"https://openalex.org/W4308080451","doi":"https://doi.org/10.1109/itsc55140.2022.9922100"},"language":"en","primary_location":{"id":"doi:10.1109/itsc55140.2022.9922100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itsc55140.2022.9922100","pdf_url":null,"source":{"id":"https://openalex.org/S4363607737","display_name":"2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102871890","display_name":"Xing Fang","orcid":"https://orcid.org/0000-0002-1801-6831"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xing Fang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","School of Mathematical Sciences, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Mathematical Sciences, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049454999","display_name":"Qichao Zhang","orcid":"https://orcid.org/0000-0001-9747-391X"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qichao Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","Peng Cheng Laboratory, Shenzhen, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074076701","display_name":"Yinfeng Gao","orcid":"https://orcid.org/0000-0002-3513-1380"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinfeng Gao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences,The State Key Laboratory of Management and Control for Complex Systems,Beijing,China,100190","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102871890"],"corresponding_institution_ids":["https://openalex.org/I150229711","https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":4.9765,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.97870968,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3417","last_page":"3422"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9796000123023987,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9796000123023987,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9778000116348267,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12617","display_name":"Energy, Environment, and Transportation Policies","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/2105","display_name":"Renewable Energy, Sustainability and the Environment"},"field":{"id":"https://openalex.org/fields/21","display_name":"Energy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7813787460327148},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7440847158432007},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7272634506225586},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6119124889373779},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.566176176071167},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5065399408340454},{"id":"https://openalex.org/keywords/online-and-offline","display_name":"Online and offline","score":0.4657880365848541},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.448237806558609}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7813787460327148},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7440847158432007},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7272634506225586},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6119124889373779},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.566176176071167},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5065399408340454},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.4657880365848541},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.448237806558609},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/itsc55140.2022.9922100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itsc55140.2022.9922100","pdf_url":null,"source":{"id":"https://openalex.org/S4363607737","display_name":"2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6685660108","display_name":null,"funder_award_id":"62173325","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7668053959","display_name":null,"funder_award_id":"L191002","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4296474751","https://openalex.org/W3074294383","https://openalex.org/W4225619808","https://openalex.org/W4319083788","https://openalex.org/W3153007185","https://openalex.org/W2983785000","https://openalex.org/W3212439828","https://openalex.org/W3128328080","https://openalex.org/W3016525976","https://openalex.org/W4303494752"],"abstract_inverted_index":{"Since":[0],"traditional":[1],"reinforcement":[2],"learning":[3,41],"(RL)":[4],"approaches":[5],"need":[6],"active":[7],"online":[8],"interaction":[9],"with":[10,81],"the":[11,19,24,119,140,154,160,178,212],"environment,":[12,27],"previous":[13],"works":[14],"are":[15,208],"mainly":[16],"investigated":[17],"in":[18,218],"simulation":[20],"environment":[21],"rather":[22],"than":[23,103],"real":[25,82,161,196],"world":[26,83,162,197],"especially":[28],"for":[29,59,115,153,215],"safety-critical":[30],"applications.":[31],"Offline":[32],"RL":[33,55,80,94,113,143,151,217],"has":[34],"recently":[35],"emerged":[36],"as":[37,62],"a":[38,45,100,123],"promising":[39],"data-driven":[40],"paradigm":[42],"to":[43,66,77,98,172,210],"learn":[44,99],"policy":[46,102],"from":[47],"offline":[48,54,68,79,93,112,142,150,186,216],"dataset":[49,85,152,164],"directly.":[50],"It":[51],"seems":[52],"that":[53],"is":[56,64,170],"well":[57,97],"suited":[58],"autonomous":[60,116,219],"driving,":[61],"it":[63,73],"feasible":[65],"collect":[67],"naturalized":[69],"driving":[70,84,101,117,163,179,198],"dataset.":[71],"However,":[72],"remains":[74],"unclear":[75],"how":[76],"deploy":[78,183],"only":[86],"including":[87,118,195],"observation":[88],"data,":[89],"and":[90,122,138,176,188,200,206],"whether":[91],"current":[92],"algorithms":[95,187],"work":[96],"imitation":[104],"learning?":[105],"In":[106],"this":[107],"paper,":[108],"we":[109,136,147,182],"provide":[110],"an":[111,149],"benchmark":[114],"dataset,":[120],"baselines,":[121],"data":[124,167,175,199],"driven":[125,168],"simulator":[126,169],"<sup":[127,130],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[128,131],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[129,132],"Code:":[133],"https://github.com/weiaiF/offiineRL-INTERACTION.":[134],"First,":[135],"summarize":[137],"introduce":[139],"popular":[141,185],"baseline":[144],"methods.":[145],"Then,":[146],"construct":[148],"car":[155],"following":[156],"task":[157],"based":[158],"on":[159],"INTERACTION.":[165],"A":[166],"applied":[171],"obtain":[173],"augmented":[174,201],"test":[177],"policy.":[180],"Further,":[181],"four":[184],"analyze":[189,211],"their":[190],"performances":[191],"under":[192],"different":[193],"datasets":[194],"data.":[202],"Finally,":[203],"related":[204],"conclusions":[205],"discussions":[207],"given":[209],"critical":[213],"challenge":[214],"driving.":[220]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
