{"id":"https://openalex.org/W7147197057","doi":"https://doi.org/10.48550/arxiv.2603.27287","title":"Uni-World VLA: Interleaved World Modeling and Planning for Autonomous Driving","display_name":"Uni-World VLA: Interleaved World Modeling and Planning for Autonomous Driving","publication_year":2026,"publication_date":"2026-03-28","ids":{"openalex":"https://openalex.org/W7147197057","doi":"https://doi.org/10.48550/arxiv.2603.27287"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.27287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.27287","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042399424","display_name":"Qiqi Liu","orcid":"https://orcid.org/0000-0003-1587-5515"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Qiqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132606508","display_name":"Huan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Huan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132651649","display_name":"Jingyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jingyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132634844","display_name":"Bin Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132578936","display_name":"Zhihui Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Zhihui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085770559","display_name":"Dan She","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"She, Dangen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132710609","display_name":"Xiatian Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xiatian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132696087","display_name":"Li Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Li","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5042399424"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9577999711036682,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9577999711036682,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.006899999920278788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6072999835014343},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5965999960899353},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.5885000228881836},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5683000087738037},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5307000279426575},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.45489999651908875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7160000205039978},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6072999835014343},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.5885000228881836},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5683000087738037},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5654000043869019},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5307000279426575},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.45489999651908875},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.36910000443458557},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3619999885559082},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31189998984336853},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C131584629","wikidata":"https://www.wikidata.org/wiki/Q4308705","display_name":"Coupling (piping)","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2761000096797943},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2662000060081482},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2619999945163727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.27287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.27287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.43413063883781433}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Autonomous":[0],"driving":[1,169],"requires":[2],"reasoning":[3],"about":[4],"how":[5],"the":[6,32,87,134],"environment":[7],"evolves":[8],"and":[9,21,54,73,101,160],"planning":[10,80,144,161],"actions":[11,75],"accordingly.":[12],"Existing":[13],"world-model-based":[14],"approaches":[15],"typically":[16],"predict":[17],"future":[18,51,71,89,149],"scenes":[19],"first":[20],"plan":[22],"afterwards,":[23],"resulting":[24],"in":[25,107],"open-loop":[26],"imagination":[27],"that":[28,48,138,155],"may":[29],"drift":[30],"from":[31],"actual":[33],"decision":[34],"process.":[35],"In":[36,111],"this":[37],"paper,":[38],"we":[39,113],"present":[40],"Uni-World":[41],"VLA,":[42],"a":[43,60,95,163],"unified":[44],"vision-language-action":[45],"(VLA)":[46],"model":[47,67],"tightly":[49,156],"interleaves":[50],"frame":[52,150],"prediction":[53,159],"trajectory":[55],"planning.":[56],"Instead":[57],"of":[58],"generating":[59],"full":[61],"world":[62,99,126,158],"rollout":[63],"before":[64],"planning,":[65],"our":[66,139],"alternates":[68],"between":[69,98],"predicting":[70],"frames":[72,119],"ego":[74],"step":[76],"by":[77],"step,":[78],"allowing":[79],"decisions":[81],"to":[82,120],"be":[83],"continuously":[84],"conditioned":[85],"on":[86,133],"imagined":[88],"observations.":[90],"This":[91],"interleaved":[92],"generation":[93],"forms":[94],"closed-loop":[96,143],"interaction":[97],"modeling":[100],"control,":[102],"enabling":[103],"more":[104],"adaptive":[105],"decision-making":[106],"dynamic":[108],"traffic":[109],"scenarios.":[110],"addition,":[112],"incorporate":[114],"monocular":[115],"depth":[116],"information":[117],"into":[118],"provide":[121],"stronger":[122],"geometric":[123],"cues":[124],"for":[125,166],"modeling,":[127],"improving":[128],"long-horizon":[129],"scene":[130],"prediction.":[131],"Experiments":[132],"NAVSIM":[135],"benchmark":[136],"show":[137],"approach":[140],"achieves":[141],"competitive":[142],"performance":[145],"while":[146],"producing":[147],"high-fidelity":[148],"predictions.":[151],"These":[152],"results":[153],"demonstrate":[154],"coupling":[157],"is":[162],"promising":[164],"direction":[165],"scalable":[167],"VLA":[168],"systems.":[170]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
