{"id":"https://openalex.org/W7133228771","doi":"https://doi.org/10.48550/arxiv.2602.23896","title":"TSC: Topology-Conditioned Stackelberg Coordination for Multi-Agent Reinforcement Learning in Interactive Driving","display_name":"TSC: Topology-Conditioned Stackelberg Coordination for Multi-Agent Reinforcement Learning in Interactive Driving","publication_year":2026,"publication_date":"2026-02-27","ids":{"openalex":"https://openalex.org/W7133228771","doi":"https://doi.org/10.48550/arxiv.2602.23896"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.23896","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127821385","display_name":"Xiaotong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Xiaotong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127791693","display_name":"Gang Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Gang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127811692","display_name":"Yuanjing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuanjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048468667","display_name":"Siyu Teng","orcid":"https://orcid.org/0000-0002-6860-9547"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Teng, Siyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127797342","display_name":"Alois Knoll","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Knoll, Alois","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127839217","display_name":"Long Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Long","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5127821385"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.6341999769210815,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.6341999769210815,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.1834000051021576,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.11469999700784683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stackelberg-competition","display_name":"Stackelberg competition","score":0.8014000058174133},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6782000064849854},{"id":"https://openalex.org/keywords/weaving","display_name":"Weaving","score":0.5024999976158142},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.42590001225471497},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4156000018119812},{"id":"https://openalex.org/keywords/decentralised-system","display_name":"Decentralised system","score":0.3953000009059906},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.37389999628067017},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.3736000061035156}],"concepts":[{"id":"https://openalex.org/C199510392","wikidata":"https://www.wikidata.org/wiki/Q1184602","display_name":"Stackelberg competition","level":2,"score":0.8014000058174133},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6895999908447266},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6782000064849854},{"id":"https://openalex.org/C54525549","wikidata":"https://www.wikidata.org/wiki/Q2553445","display_name":"Weaving","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.42590001225471497},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4156000018119812},{"id":"https://openalex.org/C205875254","wikidata":"https://www.wikidata.org/wiki/Q17156857","display_name":"Decentralised system","level":3,"score":0.3953000009059906},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.37389999628067017},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3736000061035156},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3727000057697296},{"id":"https://openalex.org/C2780864053","wikidata":"https://www.wikidata.org/wiki/Q5147495","display_name":"Collision avoidance","level":3,"score":0.34940001368522644},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3172999918460846},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31200000643730164},{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C107257861","wikidata":"https://www.wikidata.org/wiki/Q656316","display_name":"Coordination game","level":2,"score":0.29159998893737793},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.23896","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.23896","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.23896","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.23896","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.7951626181602478,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Safe":[0],"and":[1,23,35,142,157,172,205],"efficient":[2],"autonomous":[3],"driving":[4,96],"in":[5,174],"dense":[6,132,175,180],"traffic":[7,77,181,203],"is":[8],"fundamentally":[9],"a":[10,90,102,120,147],"decentralized":[11,94,143],"multi-agent":[12,55],"coordination":[13,149],"problem,":[14],"where":[15],"interactions":[16,133],"at":[17],"conflict":[18],"points":[19],"such":[20,47],"as":[21,48,76],"merging":[22],"weaving":[24,109],"must":[25],"be":[26],"resolved":[27],"reliably":[28],"under":[29,97,139],"partial":[30],"observability.":[31],"With":[32],"only":[33],"local":[34,115,166],"incomplete":[36],"cues,":[37],"interaction":[38],"patterns":[39],"can":[40],"change":[41],"rapidly,":[42],"often":[43],"causing":[44],"unstable":[45],"behaviors":[46],"oscillatory":[49],"yielding":[50],"or":[51,67],"unsafe":[52],"commitments.":[53],"Existing":[54],"reinforcement":[56],"learning":[57,91,163],"(MARL)":[58],"approaches":[59],"either":[60],"adopt":[61],"synchronous":[62],"decision-making,":[63],"which":[64,100],"exacerbate":[65],"non-stationarity,":[66],"depend":[68],"on":[69,126],"centralized":[70,140],"sequencing":[71],"mechanisms":[72],"that":[73,151,184],"scale":[74],"poorly":[75],"density":[78],"increases.":[79],"To":[80],"address":[81],"these":[82],"limitations,":[83],"we":[84],"propose":[85],"Topology-conditioned":[86],"Stackelberg":[87,136],"Coordination":[88],"(TSC),":[89],"framework":[92],"for":[93],"interactive":[95],"communication-free":[98],"execution,":[99],"extracts":[101],"time-varying":[103],"directed":[104],"priority":[105],"graph":[106],"from":[107],"braid-inspired":[108],"relations":[110],"between":[111],"trajectories,":[112],"thereby":[113],"defining":[114],"leader-follower":[116],"dependencies":[117],"without":[118],"constructing":[119],"global":[121],"order":[122],"of":[123],"play.":[124],"Conditioned":[125],"this":[127],"graph,":[128],"TSC":[129,185],"endogenously":[130],"factorizes":[131],"into":[134],"graph-local":[135],"subgames":[137],"and,":[138],"training":[141,170],"execution":[144],"(CTDE),":[145],"learns":[146],"sequential":[148],"policy":[150],"anticipates":[152],"leaders":[153],"via":[154],"action":[155],"prediction":[156],"trains":[158],"followers":[159],"through":[160],"action-conditioned":[161],"value":[162],"to":[164],"approximate":[165],"best":[167],"responses,":[168],"improving":[169],"stability":[171],"safety":[173],"traffic.":[176],"Experiments":[177],"across":[178,193],"four":[179],"scenarios":[182],"show":[183],"achieves":[186],"superior":[187],"performance":[188],"over":[189],"representative":[190],"MARL":[191],"baselines":[192],"key":[194],"metrics,":[195],"most":[196],"notably":[197],"reducing":[198],"collisions":[199],"while":[200],"maintaining":[201],"competitive":[202],"efficiency":[204],"control":[206],"smoothness.":[207]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-03T00:00:00"}
