{"id":"https://openalex.org/W7128691251","doi":"https://doi.org/10.1109/tits.2026.3656268","title":"Q-Advantage Integrated Human-Guided Reinforcement Learning for Safe End-to-End Autonomous Driving","display_name":"Q-Advantage Integrated Human-Guided Reinforcement Learning for Safe End-to-End Autonomous Driving","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128691251","doi":"https://doi.org/10.1109/tits.2026.3656268"},"language":null,"primary_location":{"id":"doi:10.1109/tits.2026.3656268","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2026.3656268","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yong Wang","orcid":"https://orcid.org/0000-0001-7607-1188"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yong Wang","raw_affiliation_strings":["Department of Data and Systems Engineering, University of Hong Kong, Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0001-7607-1188","affiliations":[{"raw_affiliation_string":"Department of Data and Systems Engineering, University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080837781","display_name":"P. Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131005","display_name":"Chery Automobile (China)","ror":"https://ror.org/02xab7z06","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210131005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pei Wang","raw_affiliation_strings":["Seres Automobile, Chongqing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seres Automobile, Chongqing, China","institution_ids":["https://openalex.org/I4210131005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124111302","display_name":"Hongwen He","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongwen He","raw_affiliation_strings":["School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2874-1858","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053317485","display_name":"Jingda Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingda Wu","raw_affiliation_strings":["School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7336-4492","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015473075","display_name":"Yingjuan Tang","orcid":"https://orcid.org/0000-0002-4838-7211"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingjuan Tang","raw_affiliation_strings":["School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4838-7211","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5125938629","display_name":"Zirui Kuang","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zirui Kuang","raw_affiliation_strings":["School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-6871-0205","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28507432,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"27","issue":"3","first_page":"2957","last_page":"2969"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.4675999879837036,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.4675999879837036,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.28369998931884766,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.037700001150369644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9156000018119812},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7527999877929688},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6338000297546387},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5054000020027161},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.42750000953674316},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.37560001015663147},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.352400004863739}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9156000018119812},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7527999877929688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6969000101089478},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6338000297546387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6140999794006348},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5054000020027161},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4855000078678131},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.33559998869895935},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.3328000009059906},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.3222000002861023},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.25769999623298645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tits.2026.3656268","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2026.3656268","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G386174299","display_name":null,"funder_award_id":"52502520","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,40,50,105],"(RL)":[2],"is":[3,120],"a":[4,34,130,142,147,153,167],"promising":[5],"approach":[6],"for":[7],"end-to-end":[8],"autonomous":[9,169],"driving,":[10],"but":[11],"its":[12],"practical":[13],"deployment":[14,165],"remains":[15],"challenging":[16],"due":[17],"to":[18,24,68,96,137],"low":[19],"sample":[20,134],"efficiency":[21,135],"and":[22,51,72,86,99,141,159],"sensitivity":[23],"reward":[25],"design.":[26],"To":[27],"address":[28],"these":[29],"challenges,":[30],"this":[31],"study":[32],"presents":[33],"novel":[35],"Q-advantage":[36,61,80],"integrated":[37],"human-guided":[38,149],"reinforcement":[39],"(QIHG-RL)":[41],"framework":[42,56,119],"that":[43,63,77],"effectively":[44],"combines":[45],"the":[46,79,83,87,94,107],"strengths":[47],"of":[48],"machine":[49],"human":[52,101],"expertise.":[53],"The":[54,118],"QIHG-RL":[55],"features:":[57],"1)":[58],"an":[59,74],"ensemble":[60],"function":[62],"aggregates":[64],"multiple":[65],"value":[66,70],"networks":[67],"enhance":[69],"estimation,":[71],"2)":[73],"integration":[75],"mechanism":[76],"embeds":[78],"into":[81],"both":[82],"actor-critic":[84],"network":[85],"prioritized":[88],"experience":[89],"replay.":[90],"This":[91],"design":[92],"allows":[93],"agent":[95],"leverage":[97],"sparse":[98],"sub-optimal":[100],"demonstrations,":[102],"accelerating":[103],"policy":[104],"in":[106,133],"early":[108],"training":[109,116],"phase":[110],"while":[111],"gradually":[112],"enhancing":[113],"exploration":[114],"as":[115],"progresses.":[117],"evaluated":[121],"across":[122],"three":[123],"safety-critical":[124],"driving":[125],"tasks.":[126],"Experimental":[127],"results":[128],"show":[129],"167%":[131],"improvement":[132],"compared":[136],"standard":[138],"RL":[139,150],"methods":[140],"14%":[143],"performance":[144],"gain":[145],"over":[146],"state-of-the-art":[148],"baseline.":[151],"Furthermore,":[152],"Sim2Real":[154],"pipeline":[155],"combining":[156],"domain":[157],"randomization":[158],"semantic":[160],"denoised":[161],"remapping":[162],"facilitates":[163],"successful":[164],"on":[166],"real-world":[168],"vehicle.":[170]},"counts_by_year":[],"updated_date":"2026-03-17T06:59:57.516163","created_date":"2026-02-02T00:00:00"}
