{"id":"https://openalex.org/W7140080798","doi":"https://doi.org/10.48550/arxiv.2603.19979","title":"X-World: Controllable Ego-Centric Multi-Camera World Models for Scalable End-to-End Driving","display_name":"X-World: Controllable Ego-Centric Multi-Camera World Models for Scalable End-to-End Driving","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7140080798","doi":"https://doi.org/10.48550/arxiv.2603.19979"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.19979","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19979","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.19979","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015757467","display_name":"Chaoda Zheng","orcid":"https://orcid.org/0000-0001-9347-4181"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zheng, Chaoda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130361031","display_name":"Sean Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Sean","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024719468","display_name":"Jinhao Deng","orcid":"https://orcid.org/0000-0003-1271-5532"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Jinhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101874417","display_name":"Zhennan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhennan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130335202","display_name":"Shijia Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shijia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101438489","display_name":"Liqiang Xiao","orcid":"https://orcid.org/0000-0002-8961-0010"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Liqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130404713","display_name":"Ziheng Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Ziheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130369495","display_name":"Hongbin Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Hongbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002393832","display_name":"Kangjie Chen","orcid":"https://orcid.org/0009-0007-2454-4976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Kangjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130357384","display_name":"Boyang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Boyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130410471","display_name":"Yu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130402726","display_name":"Xianming Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xianming","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5015757467"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5282999873161316,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5282999873161316,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.20080000162124634,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.1046999990940094,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6614000201225281},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5954999923706055},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4648999869823456},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.4514000117778778},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.4499000012874603},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.42820000648498535},{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.36239999532699585},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.3458999991416931}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670999765396118},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6614000201225281},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5954999923706055},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.5059000253677368},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4648999869823456},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.4514000117778778},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.4499000012874603},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.42820000648498535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3889999985694885},{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3458999991416931},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.3422999978065491},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3197000026702881},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3190999925136566},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.304500013589859},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.29190000891685486},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.27720001339912415},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.19979","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19979","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.19979","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19979","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4691084623336792,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scalable":[0],"and":[1,45,67,94,113,126,130,140,163,184,214,222,238],"reliable":[2],"evaluation":[3,28],"is":[4,38,171],"increasingly":[5],"critical":[6],"in":[7,86],"the":[8,107,160,167],"end-to-end":[9],"era":[10],"of":[11,142,169],"autonomous":[12],"driving,":[13],"where":[14],"vision--language--action":[15],"(VLA)":[16],"policies":[17],"directly":[18,85],"map":[19],"raw":[20],"sensor":[21],"streams":[22,104],"to":[23,47,178,225],"driving":[24],"actions.":[25,109],"Yet,":[26],"current":[27],"pipelines":[29],"still":[30],"rely":[31],"heavily":[32],"on":[33,155],"real-world":[34,53],"road":[35,128],"testing,":[36],"which":[37],"costly,":[39],"biased":[40],"toward":[41],"limited":[42],"scenario":[43],"coverage,":[44],"difficult":[46],"reproduce.":[48],"These":[49,229],"challenges":[50],"motivate":[51],"a":[52,95,132,172,233],"simulator":[54],"that":[55,81,105,193],"can":[56],"generate":[57],"realistic":[58],"future":[59,83,96,101],"observations":[60,84],"under":[61,187],"proposed":[62],"actions,":[63],"while":[64,158],"remaining":[65],"controllable":[66],"stable":[68,208],"over":[69,122,211],"long":[70,212],"horizons.":[71],"We":[72],"present":[73],"X-World,":[74],"an":[75],"action-conditioned":[76],"multi-camera":[77,102],"generative":[78],"world":[79,145],"model":[80],"simulates":[82],"video":[87,103,150,175,198],"space.":[88],"Given":[89],"synchronized":[90],"multi-view":[91,173,197],"camera":[92],"history":[93],"action":[97,162,220],"sequence,":[98],"X-World":[99,117,147,170,194,232],"generates":[100],"follow":[106],"commanded":[108],"To":[110],"ensure":[111],"reproducible":[112,239],"editable":[114],"scene":[115,164,227],"rollouts,":[116,213],"further":[118],"supports":[119],"optional":[120,226],"controls":[121],"dynamic":[123],"traffic":[124],"agents":[125],"static":[127],"elements,":[129],"retains":[131],"text-prompt":[133],"interface":[134],"for":[135,236],"appearance-level":[136],"control":[137,189],"(e.g.,":[138],"weather":[139],"time":[141],"day).":[143],"Beyond":[144],"simulation,":[146],"also":[148],"enables":[149],"style":[151],"transfer":[152],"by":[153],"conditioning":[154],"appearance":[156],"prompts":[157],"preserving":[159],"underlying":[161],"dynamics.":[165],"At":[166],"core":[168],"latent":[174],"generator":[176],"designed":[177],"explicitly":[179],"encourage":[180],"cross-view":[181],"geometric":[182],"consistency":[183,204],"temporal":[185,209],"coherence":[186],"diverse":[188],"signals.":[190],"Experiments":[191],"show":[192],"achieves":[195],"high-quality":[196],"generation":[199],"with":[200,218],"(i)":[201],"strong":[202],"view":[203],"across":[205],"cameras,":[206],"(ii)":[207],"dynamics":[210],"(iii)":[215],"high":[216],"controllability":[217],"strict":[219],"following":[221],"faithful":[223],"adherence":[224],"controls.":[228],"properties":[230],"make":[231],"practical":[234],"foundation":[235],"scalable":[237],"evaluation.":[240]},"counts_by_year":[],"updated_date":"2026-03-24T06:04:31.470712","created_date":"2026-03-24T00:00:00"}
