{"id":"https://openalex.org/W3211148793","doi":"https://doi.org/10.1109/itsc48978.2021.9564528","title":"Microscopic Model-Based RL Approaches for Traffic Signal Control Generalize Better than Model-Free RL Approaches","display_name":"Microscopic Model-Based RL Approaches for Traffic Signal Control Generalize Better than Model-Free RL Approaches","publication_year":2021,"publication_date":"2021-09-19","ids":{"openalex":"https://openalex.org/W3211148793","doi":"https://doi.org/10.1109/itsc48978.2021.9564528","mag":"3211148793"},"language":"en","primary_location":{"id":"doi:10.1109/itsc48978.2021.9564528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itsc48978.2021.9564528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Intelligent Transportation Systems Conference (ITSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048319650","display_name":"Parth Jaggi","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Parth Jaggi","raw_affiliation_strings":["Department of Mechanical & Industrial Engineering, University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical & Industrial Engineering, University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100357594","display_name":"Xiaoyu Wang","orcid":"https://orcid.org/0000-0002-3819-2015"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Xiaoyu Wang","raw_affiliation_strings":["Department of Civil & Mineral Engineering, University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Civil & Mineral Engineering, University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016796784","display_name":"Nicolas Carrara","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Nicolas Carrara","raw_affiliation_strings":["Department of Mechanical & Industrial Engineering, University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical & Industrial Engineering, University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028174137","display_name":"Scott Sanner","orcid":"https://orcid.org/0000-0001-7984-8394"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Scott Sanner","raw_affiliation_strings":["Department of Mechanical & Industrial Engineering, University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical & Industrial Engineering, University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007210426","display_name":"Baher Abdulhai","orcid":"https://orcid.org/0000-0002-8787-2578"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Baher Abdulhai","raw_affiliation_strings":["Department of Civil & Mineral Engineering, University of Toronto, Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Civil & Mineral Engineering, University of Toronto, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5048319650"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":0.6867,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70033191,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2525","last_page":"2532"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7792044878005981},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.776233434677124},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.6813600063323975},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5059613585472107},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48411548137664795},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.46006524562835693},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.45069390535354614},{"id":"https://openalex.org/keywords/network-topology","display_name":"Network topology","score":0.44511306285858154},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4362228214740753},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42367807030677795},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4197208881378174},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.15785598754882812},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0975070595741272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7792044878005981},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.776233434677124},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.6813600063323975},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5059613585472107},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48411548137664795},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.46006524562835693},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.45069390535354614},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.44511306285858154},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4362228214740753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42367807030677795},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4197208881378174},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.15785598754882812},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0975070595741272},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/itsc48978.2021.9564528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itsc48978.2021.9564528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Intelligent Transportation Systems Conference (ITSC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322183","display_name":"Huawei Technologies","ror":"https://ror.org/00cmhce21"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W626724867","https://openalex.org/W1559240825","https://openalex.org/W1714211023","https://openalex.org/W1757796397","https://openalex.org/W1972841873","https://openalex.org/W1973479020","https://openalex.org/W2073787051","https://openalex.org/W2088595989","https://openalex.org/W2091700708","https://openalex.org/W2107549951","https://openalex.org/W2121863487","https://openalex.org/W2134560943","https://openalex.org/W2135997697","https://openalex.org/W2155968351","https://openalex.org/W2168405694","https://openalex.org/W2257979135","https://openalex.org/W2480177474","https://openalex.org/W2604712483","https://openalex.org/W2613020517","https://openalex.org/W2746553466","https://openalex.org/W2754879180","https://openalex.org/W2766447205","https://openalex.org/W2779040504","https://openalex.org/W2786928559","https://openalex.org/W2903709398","https://openalex.org/W2915117209","https://openalex.org/W2933570795","https://openalex.org/W2963403143","https://openalex.org/W2964335069","https://openalex.org/W2964749398","https://openalex.org/W2991060903","https://openalex.org/W3093139241","https://openalex.org/W3114441221","https://openalex.org/W3137695714","https://openalex.org/W4298857966","https://openalex.org/W6619883814","https://openalex.org/W6637441126","https://openalex.org/W6680235470","https://openalex.org/W6746866138","https://openalex.org/W6761496496"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698"],"abstract_inverted_index":{"There":[0],"have":[1],"been":[2],"many":[3],"recent":[4],"advances":[5],"in":[6,26,157,183],"the":[7,22,27,33,36,44,50,60,64,152,168,180,201],"Traffic":[8],"Signal":[9],"Control":[10],"literature":[11],"that":[12,84,195],"use":[13],"reinforcement":[14],"learning,":[15],"most":[16],"of":[17,98,125,148,164,185],"which":[18,178,211],"is":[19,167,212],"undertaken":[20],"using":[21],"model-free":[23,28,153],"approach.":[24],"Approaches":[25],"domain,":[29],"attempt":[30],"to":[31,42,58,76,136,144,170],"learn":[32,43],"value":[34,51,61],"or":[35],"policy":[37],"function":[38,52],"directly":[39],"without":[40],"attempting":[41],"environment":[45],"transition":[46],"dynamics.":[47],"Therefore,":[48],"training":[49],"under":[53],"a":[54,89,127,145,213],"specified":[55],"dynamics":[56,91,99],"fails":[57],"differentiate":[59],"updates":[62],"from":[63],"underlying":[65],"dynamics,":[66],"making":[67],"these":[68,197],"methods":[69],"require":[70],"much":[71,104],"larger":[72],"agent-environment":[73],"interaction":[74],"data":[75],"generalize":[77],"over":[78],"different":[79],"scenarios.":[80],"In":[81],"contrast,":[82],"approaches":[83,166],"optimize":[85,137],"agent":[86],"actions":[87],"w.r.t.":[88],"learned":[90,202],"model":[92,130,203],"inherently":[93],"avoid":[94],"this":[95,112,121],"tight":[96],"coupling":[97],"and":[100,131,188,209],"value,":[101],"allowing":[102],"for":[103,216],"faster":[105],"adaptation":[106],"as":[107],"traffic":[108,149],"scenarios":[109],"change.":[110],"For":[111],"work":[113],"on":[114],"single":[115],"intersection":[116,189],"control,":[117],"we":[118,193],"specifically":[119],"adopt":[120],"latter":[122],"model-based":[123,165],"approach":[124,141,207],"learning":[126],"microscopic":[128],"simulator":[129],"then":[132],"apply":[133],"tree-search":[134],"techniques":[135],"control":[138,171,198],"actions.":[139],"This":[140],"quickly":[142],"generalizes":[143],"diverse":[146],"set":[147],"demands,":[150],"whereas":[151],"method":[154,181],"performs":[155],"suboptimally":[156],"conditions":[158],"unseen":[159,176],"during":[160],"training.":[161],"Another":[162],"benefit":[163],"ability":[169],"new":[172],"intersections":[173],"with":[174,200],"previously":[175],"topologies,":[177],"makes":[179,205],"transferable":[182],"terms":[184],"both":[186],"demand":[187],"structure":[190],"variation.":[191],"Finally,":[192],"observe":[194],"pairing":[196],"strategies":[199],"also":[204],"our":[206],"debuggable":[208],"explainable,":[210],"critical":[214],"requirement":[215],"real-world":[217],"deployment.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
