{"id":"https://openalex.org/W4312382698","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892538","title":"Urban Traffic Signal Control with Reinforcement Learning from Demonstration Data","display_name":"Urban Traffic Signal Control with Reinforcement Learning from Demonstration Data","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4312382698","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892538"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892538","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://figshare.com/articles/conference_contribution/Urban_Traffic_Signal_Control_with_Reinforcement_Learning_from_Demonstration_Data/22129862","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100711052","display_name":"Min Wang","orcid":"https://orcid.org/0000-0001-7025-7651"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]},{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Min Wang","raw_affiliation_strings":["School of Computer Science, Wuhan University,Wuhan,China","School of Computer Science, Wuhan University, Wuhan, China","State Key Laboratory of Integrated Services Networks, Xidian University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Wuhan University,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"State Key Laboratory of Integrated Services Networks, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108047844","display_name":"Libing Wu","orcid":"https://orcid.org/0000-0001-9897-1953"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Libing Wu","raw_affiliation_strings":["School of Computer Science, Wuhan University,Wuhan,China","School of Computer Science, Wuhan University, Wuhan, China","School of Cyber Science and Engineering, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Wuhan University,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100380474","display_name":"Jianxin Li","orcid":"https://orcid.org/0000-0002-9059-330X"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jianxin Li","raw_affiliation_strings":["School of Information Technology, Deakin University,Burwood,VIC,Australia","School of Information Technology, Deakin University, Burwood, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"School of Information Technology, Deakin University,Burwood,VIC,Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"School of Information Technology, Deakin University, Burwood, VIC, Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065647746","display_name":"Dan Wu","orcid":"https://orcid.org/0000-0002-2722-8676"},"institutions":[{"id":"https://openalex.org/I74413500","display_name":"University of Windsor","ror":"https://ror.org/01gw3d370","country_code":"CA","type":"education","lineage":["https://openalex.org/I74413500"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Dan Wu","raw_affiliation_strings":["School of Computer Science, University of Windsor,Windsor,Canada,N9B 3P4"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Windsor,Windsor,Canada,N9B 3P4","institution_ids":["https://openalex.org/I74413500"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023893507","display_name":"Chao Ma","orcid":"https://orcid.org/0000-0002-7443-6267"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Ma","raw_affiliation_strings":["School of Cyber Science and Engineering, Wuhan University,Wuhan,China","School of Cyber Science and Engineering, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Engineering, Wuhan University,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100711052"],"corresponding_institution_ids":["https://openalex.org/I149594827","https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":2.1179,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.89342039,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11344","display_name":"Traffic Prediction and Management Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.882311224937439},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8150403499603271},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.7512258291244507},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5477461814880371},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5398674607276917},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5367957353591919},{"id":"https://openalex.org/keywords/pace","display_name":"Pace","score":0.4867420196533203},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4629088342189789},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4493357837200165}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.882311224937439},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8150403499603271},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.7512258291244507},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5477461814880371},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5398674607276917},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5367957353591919},{"id":"https://openalex.org/C2777526511","wikidata":"https://www.wikidata.org/wiki/Q691543","display_name":"Pace","level":2,"score":0.4867420196533203},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4629088342189789},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4493357837200165},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892538","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:figshare.com:article/22129862","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Urban_Traffic_Signal_Control_with_Reinforcement_Learning_from_Demonstration_Data/22129862","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/22129862","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Urban_Traffic_Signal_Control_with_Reinforcement_Learning_from_Demonstration_Data/22129862","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.6100000143051147}],"awards":[{"id":"https://openalex.org/G2562408452","display_name":null,"funder_award_id":"61772377,61672257,91746206,U20A20177","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W106792269","https://openalex.org/W1516835682","https://openalex.org/W2009672082","https://openalex.org/W2137375617","https://openalex.org/W2145339207","https://openalex.org/W2201581102","https://openalex.org/W2397581010","https://openalex.org/W2415726935","https://openalex.org/W2481567506","https://openalex.org/W2741122588","https://openalex.org/W2766381093","https://openalex.org/W2786928559","https://openalex.org/W2788862220","https://openalex.org/W2805560727","https://openalex.org/W2809148419","https://openalex.org/W2915117209","https://openalex.org/W2939228328","https://openalex.org/W2945991855","https://openalex.org/W2963027910","https://openalex.org/W2963099939","https://openalex.org/W2963277051","https://openalex.org/W2963363446","https://openalex.org/W2964043796","https://openalex.org/W2983178256","https://openalex.org/W2985159211","https://openalex.org/W2988973041","https://openalex.org/W2998187693","https://openalex.org/W4214717370","https://openalex.org/W6638444622","https://openalex.org/W6680322746","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6712392314","https://openalex.org/W6716193690","https://openalex.org/W6718092244","https://openalex.org/W6721743441","https://openalex.org/W6742461812","https://openalex.org/W6748314335","https://openalex.org/W6748554570","https://openalex.org/W6748645729","https://openalex.org/W6751714826","https://openalex.org/W6761871375"],"related_works":["https://openalex.org/W3204184292","https://openalex.org/W3176564347","https://openalex.org/W2386723501","https://openalex.org/W2355833770","https://openalex.org/W1985458517","https://openalex.org/W3031039437","https://openalex.org/W183202219","https://openalex.org/W3095877357","https://openalex.org/W2072565696","https://openalex.org/W2050451745"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,24],"has":[2,10],"been":[3],"applied":[4],"to":[5,21,30,52,125,154,163],"various":[6],"decision-making":[7,116],"tasks":[8],"and":[9,16,43,112,152,170,191],"achieved":[11],"high":[12],"profile":[13],"successes.":[14],"More":[15],"more":[17],"studies":[18],"have":[19],"proposed":[20],"use":[22,120],"reinforcement":[23],"(RL)":[25],"for":[26,73,134],"traffic":[27,74,99,135],"signal":[28,75,136],"control":[29,76],"improve":[31],"transportation":[32],"efficiency.":[33],"However,":[34],"these":[35],"methods":[36,199],"suffer":[37],"from":[38,94,143],"a":[39,140,148,156],"major":[40],"exploration":[41],"problem,":[42,67],"their":[44],"performance":[45],"is":[46,161],"particularly":[47],"poor.":[48],"And":[49],"even":[50],"fail":[51],"quickly":[53,169],"converge":[54,168],"during":[55],"the":[56,62,95,114,127,165,172,196],"initial":[57],"stage":[58],"when":[59],"interacting":[60],"with":[61,188],"environment.":[63],"To":[64],"overcome":[65],"this":[66],"we":[68,119,145],"propose":[69],"an":[70,201],"RL":[71,87,141,166],"model":[72,88,142],"based":[77],"on":[78,178],"demonstration":[79,122],"data,":[80],"which":[81],"provides":[82],"prior":[83],"expert":[84,107],"knowledge":[85,108],"before":[86],"training.":[89],"The":[90],"demonstrations":[91],"are":[92],"collected":[93],"classic":[96],"method":[97,185],"self-organizing":[98],"light":[100],"(SOTL).":[101],"It":[102],"not":[103],"only":[104],"serves":[105],"as":[106],"but":[109],"also":[110],"explores":[111],"improves":[113],"entire":[115],"system.":[117],"Specifically,":[118],"small":[121],"data":[123,151],"sets":[124],"pre-train":[126],"Ape-X":[128],"Deep":[129],"Q-learning":[130],"Network":[131],"(DQ":[132],"N)":[133],"control.":[137],"When":[138],"training":[139],"scratch,":[144],"often":[146],"need":[147],"lot":[149],"of":[150,174,203],"time":[153,194],"learn":[155],"better":[157,187],"initialization.":[158],"Our":[159],"approach":[160],"dedicated":[162],"making":[164],"algorithm":[167],"accelerating":[171],"pace":[173],"learning.":[175],"Extensive":[176],"experiments":[177],"three":[179],"urban":[180],"datasets":[181],"confirm":[182],"that":[183],"our":[184],"performs":[186],"faster":[189],"convergence":[190],"least":[192],"travel":[193],"than":[195],"current":[197],"RL-based":[198],"by":[200],"average":[202],"23.9%,":[204],"23.8%,":[205],"11.6%":[206]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
