{"id":"https://openalex.org/W7141011382","doi":"https://doi.org/10.48550/arxiv.2603.24989","title":"Learning Rollout from Sampling:An R1-Style Tokenized Traffic Simulation Model","display_name":"Learning Rollout from Sampling:An R1-Style Tokenized Traffic Simulation Model","publication_year":2026,"publication_date":"2026-03-26","ids":{"openalex":"https://openalex.org/W7141011382","doi":"https://doi.org/10.48550/arxiv.2603.24989"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.24989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.24989","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130735573","display_name":"Ziyan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Ziyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130808802","display_name":"Peng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130758888","display_name":"Ding Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ding","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130755890","display_name":"Chiwei Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130737825","display_name":"Qichao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qichao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130775269","display_name":"Zhongpu Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Zhongpu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130778706","display_name":"Guizhen Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Guizhen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5130735573"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.5444999933242798,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.5444999933242798,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.08410000056028366,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.053300000727176666,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7418000102043152},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4675999879837036},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4641999900341034},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.42089998722076416},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4047999978065491},{"id":"https://openalex.org/keywords/adaptive-sampling","display_name":"Adaptive sampling","score":0.3968999981880188},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.38280001282691956}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7418000102043152},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7268000245094299},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4675999879837036},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4641999900341034},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4602999985218048},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43130001425743103},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.42089998722076416},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C2781395549","wikidata":"https://www.wikidata.org/wiki/Q4680762","display_name":"Adaptive sampling","level":3,"score":0.3968999981880188},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.38280001282691956},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3321000039577484},{"id":"https://openalex.org/C121704057","wikidata":"https://www.wikidata.org/wiki/Q352070","display_name":"Collision","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3151000142097473},{"id":"https://openalex.org/C2778391309","wikidata":"https://www.wikidata.org/wiki/Q7832527","display_name":"Traffic simulation","level":3,"score":0.30820000171661377},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3010999858379364},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.2750999927520752},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.24989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.24989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Learning":[0],"diverse":[1,159,171],"and":[2,35,100,162,170],"high-fidelity":[3],"traffic":[4,33,81],"simulations":[5],"from":[6],"human":[7],"driving":[8,14],"demonstrations":[9],"is":[10],"crucial":[11],"for":[12,64],"autonomous":[13],"evaluation.":[15],"The":[16],"recent":[17],"next-token":[18],"prediction":[19],"(NTP)":[20],"paradigm,":[21],"widely":[22],"adopted":[23],"in":[24,55,167],"large":[25],"language":[26],"models":[27],"(LLMs),":[28],"has":[29],"been":[30],"applied":[31],"to":[32,90,189],"simulation":[34,82,110],"achieves":[36,185],"iterative":[37],"improvements":[38],"via":[39],"supervised":[40],"fine-tuning":[41],"(SFT).":[42],"However,":[43],"such":[44],"methods":[45],"limit":[46],"active":[47],"exploration":[48,66],"of":[49,105],"potentially":[50],"valuable":[51],"motion":[52,69,96,107,125,136],"tokens,":[53],"particularly":[54],"suboptimal":[56],"regions.":[57],"Entropy":[58],"patterns":[59],"provide":[60],"a":[61,78,146,154],"promising":[62],"perspective":[63],"enabling":[65],"driven":[67],"by":[68,73,145],"token":[70,97],"uncertainty.":[71],"Motivated":[72],"this":[74],"insight,":[75],"we":[76,113],"propose":[77],"novel":[79],"tokenized":[80],"policy,":[83],"R1Sim,":[84],"which":[85],"represents":[86],"an":[87,115],"initial":[88],"attempt":[89],"explore":[91],"reinforcement":[92],"learning":[93],"based":[94],"on":[95,109,122,176],"entropy":[98],"patterns,":[99],"systematically":[101],"analyzes":[102],"the":[103,177],"impact":[104],"different":[106],"tokens":[108,126],"outcomes.":[111],"Specifically,":[112],"introduce":[114],"entropy-guided":[116],"adaptive":[117],"sampling":[118,161],"mechanism":[119],"that":[120,183],"focuses":[121],"previously":[123],"overlooked":[124],"with":[127],"high":[128,131],"uncertainty":[129],"yet":[130],"potential.":[132],"We":[133],"further":[134],"optimize":[135],"behaviors":[137],"using":[138],"Group":[139],"Relative":[140],"Policy":[141],"Optimization":[142],"(GRPO),":[143],"guided":[144],"safety-aware":[147],"reward":[148],"design.":[149],"Overall,":[150],"these":[151],"components":[152],"enable":[153],"balanced":[155],"exploration-exploitation":[156],"trade-off":[157],"through":[158],"high-uncertainty":[160],"group-wise":[163],"comparative":[164],"estimation,":[165],"resulting":[166],"realistic,":[168],"safe,":[169],"multi-agent":[172],"behaviors.":[173],"Extensive":[174],"experiments":[175],"Waymo":[178],"Sim":[179],"Agent":[180],"benchmark":[181],"demonstrate":[182],"R1Sim":[184],"competitive":[186],"performance":[187],"compared":[188],"state-of-the-art":[190],"methods.":[191]},"counts_by_year":[],"updated_date":"2026-03-28T06:16:51.555046","created_date":"2026-03-28T00:00:00"}
