{"id":"https://openalex.org/W7127357935","doi":"https://doi.org/10.48550/arxiv.2602.02214","title":"Causal Forcing: Autoregressive Diffusion Distillation Done Right for High-Quality Real-Time Interactive Video Generation","display_name":"Causal Forcing: Autoregressive Diffusion Distillation Done Right for High-Quality Real-Time Interactive Video Generation","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127357935","doi":"https://doi.org/10.48550/arxiv.2602.02214"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.02214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.02214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.02214","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124880506","display_name":"Hongzhou Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhu, Hongzhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124877793","display_name":"Min Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124942354","display_name":"Guande He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Guande","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124874495","display_name":"Hang Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Hang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124915126","display_name":"Chongxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chongxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124967016","display_name":"Jun Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jun","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124880506"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5210999846458435,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5210999846458435,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.13379999995231628,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.04610000178217888,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.656000018119812},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5813999772071838},{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.5809000134468079},{"id":"https://openalex.org/keywords/ode","display_name":"Ode","score":0.5274999737739563},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.4555000066757202},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.40059998631477356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.755299985408783},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.656000018119812},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5813999772071838},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.5809000134468079},{"id":"https://openalex.org/C34862557","wikidata":"https://www.wikidata.org/wiki/Q178985","display_name":"Ode","level":2,"score":0.5274999737739563},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.4555000066757202},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41029998660087585},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.40950000286102295},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3968000113964081},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27469998598098755},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.02214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.02214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.02214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.02214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"To":[0,97],"achieve":[1],"real-time":[2],"interactive":[3],"video":[4,11],"generation,":[5],"current":[6],"methods":[7],"distill":[8],"pretrained":[9],"bidirectional":[10,76],"diffusion":[12],"models":[13],"into":[14],"few-step":[15],"autoregressive":[16],"(AR)":[17],"models,":[18],"facing":[19],"an":[20,67,71,107],"architectural":[21,116],"gap":[22,38],"when":[23],"full":[24],"attention":[25],"is":[26],"replaced":[27],"by":[28,135],"causal":[29],"attention.":[30],"However,":[31],"existing":[32],"approaches":[33],"do":[34],"not":[35],"bridge":[36],"this":[37,79,99],"theoretically.":[39],"They":[40],"initialize":[41],"the":[42,64,84,115,131,151],"AR":[43,68,72,108],"student":[44,73],"via":[45],"ODE":[46,111],"distillation,":[47],"which":[48,94],"requires":[49],"frame-level":[50],"injectivity,":[51],"where":[52],"each":[53],"noisy":[54],"frame":[55,62],"must":[56],"map":[57,87],"to":[58],"a":[59,75,91],"unique":[60],"clean":[61],"under":[63],"PF-ODE":[65],"of":[66,83],"teacher.":[69],"Distilling":[70],"from":[74],"teacher":[77,109],"violates":[78],"condition,":[80],"preventing":[81],"recovery":[82],"teacher's":[85],"flow":[86],"and":[88,143,150],"instead":[89],"inducing":[90],"conditional-expectation":[92],"solution,":[93],"degrades":[95],"performance.":[96],"address":[98],"issue,":[100],"we":[101],"propose":[102],"Causal":[103],"Forcing":[104,134],"that":[105,121],"uses":[106],"for":[110],"initialization,":[112],"thereby":[113],"bridging":[114],"gap.":[117],"Empirical":[118],"results":[119],"show":[120],"our":[122],"method":[123],"outperforms":[124],"all":[125,128],"baselines":[126],"across":[127],"metrics,":[129],"surpassing":[130],"SOTA":[132],"Self":[133],"19.3\\%":[136],"in":[137,141,145],"Dynamic":[138],"Degree,":[139],"8.7\\%":[140],"VisionReward,":[142],"16.7\\%":[144],"Instruction":[146],"Following.":[147],"Project":[148],"page":[149],"code:":[152],"\\href{https://thu-ml.github.io/CausalForcing.github.io/}{https://thu-ml.github.io/CausalForcing.github.io/}":[153]},"counts_by_year":[],"updated_date":"2026-02-04T23:14:21.375766","created_date":"2026-02-04T00:00:00"}
