{"id":"https://openalex.org/W4416525310","doi":"https://doi.org/10.1109/iccv51701.2025.02527","title":"Epona: Autoregressive Diffusion World Model for Autonomous Driving","display_name":"Epona: Autoregressive Diffusion World Model for Autonomous Driving","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416525310","doi":"https://doi.org/10.1109/iccv51701.2025.02527"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.24113","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101569300","display_name":"Kaiwen Zhang","orcid":"https://orcid.org/0009-0001-4765-0831"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Kaiwen Zhang","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022780959","display_name":"Zhenyu Tang","orcid":"https://orcid.org/0000-0001-5544-9727"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenyu Tang","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069317373","display_name":"Xiaotao Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaotao Hu","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052549072","display_name":"Xingang Pan","orcid":"https://orcid.org/0000-0002-5825-9467"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xingang Pan","raw_affiliation_strings":["Nanyang Technological University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036318763","display_name":"Xiaoyang Guo","orcid":"https://orcid.org/0000-0002-2122-1510"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoyang Guo","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100390952","display_name":"Yuan Liu","orcid":"https://orcid.org/0000-0003-4247-3562"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yuan Liu","raw_affiliation_strings":["Hong Kong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029366714","display_name":"Jingwei Huang","orcid":"https://orcid.org/0000-0002-9116-4929"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwei Huang","raw_affiliation_strings":["Tencent Hunyuan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Hunyuan","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Li Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Yuan","raw_affiliation_strings":["Shenzhen Gruaduate School, Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen Gruaduate School, Peking University","institution_ids":["https://openalex.org/I4210128628"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100401625","display_name":"Qian Zhang","orcid":"https://orcid.org/0000-0002-2751-9940"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Qian Zhang","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072177890","display_name":"Xiaoxiao Long","orcid":"https://orcid.org/0000-0002-3386-8805"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao-Xiao Long","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040959467","display_name":"Xun Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Cao","raw_affiliation_strings":["Nanjing University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100777411","display_name":"Wei Yin","orcid":"https://orcid.org/0000-0002-4349-8297"},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Yin","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31583884,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"27220","last_page":"27230"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6079000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6079000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.08089999854564667,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.06109999865293503,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.7592999935150146},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6901999711990356},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5794000029563904},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.578000009059906},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5336999893188477},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4830999970436096},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.3644999861717224},{"id":"https://openalex.org/keywords/time-series","display_name":"Time series","score":0.3472000062465668}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.7592999935150146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7102000117301941},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6901999711990356},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5794000029563904},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.578000009059906},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5336999893188477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5214999914169312},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4830999970436096},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.3644999861717224},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C151406439","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time series","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3222000002861023},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.3131999969482422},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3109000027179718},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.28619998693466187},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.2711000144481659},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2639999985694885},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2506.24113","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.24113","pdf_url":"https://arxiv.org/pdf/2506.24113","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2506.24113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.24113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.24113","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.24113","pdf_url":"https://arxiv.org/pdf/2506.24113","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"models":[1,23,39],"have":[2],"demonstrated":[3],"exceptional":[4],"visual":[5,107],"quality":[6],"in":[7,109,130],"video":[8,20,37,99],"generation,":[9,93],"making":[10],"them":[11],"promising":[12],"for":[13],"autonomous":[14],"driving":[15],"world":[16,22,68,92,153],"modeling.":[17],"However,":[18],"existing":[19],"diffusion-based":[21],"struggle":[24],"with":[25,106,138],"flexible-length,":[26],"long-horizon":[27],"predictions":[28],"and":[29,94,98,142],"integrating":[30],"trajectory":[31,97],"planning.":[32],"This":[33],"is":[34],"because":[35],"conventional":[36],"diffusion":[38,67],"rely":[40],"on":[41,166],"global":[42],"joint":[43],"distribution":[44,74],"modeling":[45,75,88,108],"of":[46],"fixed-length":[47],"frame":[48],"sequences":[49],"rather":[50],"than":[51],"sequentially":[52],"constructing":[53],"localized":[54,72],"distributions":[55],"at":[56,174],"each":[57],"timestep.":[58],"In":[59],"this":[60],"work,":[61],"we":[62],"propose":[63],"Epona,":[64],"an":[65,110],"autoregressive":[66,131],"model":[69,154],"that":[70,84,101],"enables":[71,115],"spatiotemporal":[73,82],"through":[76],"two":[77],"key":[78],"innovations:":[79],"1)":[80],"Decoupled":[81],"factorization":[83],"separates":[85],"temporal":[86],"dynamics":[87],"from":[89],"fine-grained":[90],"future":[91],"2)":[95],"Modular":[96],"prediction":[100,145],"seamlessly":[102],"integrate":[103],"motion":[104,160],"planning":[105],"end-to-end":[111,164],"framework.":[112],"Our":[113],"architecture":[114],"high-resolution,":[116],"long-duration":[117],"generation":[118],"while":[119],"introducing":[120],"a":[121,158],"novel":[122],"chain-of-forward":[123],"training":[124],"strategy":[125],"to":[126,148],"address":[127],"error":[128],"accumulation":[129],"loops.":[132],"Experimental":[133],"results":[134],"demonstrate":[135],"state-of-the-art":[136],"performance":[137],"7.4\\%":[139],"FVD":[140],"improvement":[141],"minutes":[143],"longer":[144],"duration":[146],"compared":[147],"prior":[149],"works.":[150],"The":[151],"learned":[152],"further":[155],"serves":[156],"as":[157],"real-time":[159],"planner,":[161],"outperforming":[162],"strong":[163],"planners":[165],"NAVSIM":[167],"benchmarks.":[168],"Code":[169],"will":[170],"be":[171],"publicly":[172],"available":[173],"\\href{https://github.com/Kevin-thu/Epona/}{https://github.com/Kevin-thu/Epona/}.":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
