{"id":"https://openalex.org/W7133217464","doi":"https://doi.org/10.48550/arxiv.2602.24289","title":"Mode Seeking meets Mean Seeking for Fast Long Video Generation","display_name":"Mode Seeking meets Mean Seeking for Fast Long Video Generation","publication_year":2026,"publication_date":"2026-02-27","ids":{"openalex":"https://openalex.org/W7133217464","doi":"https://doi.org/10.48550/arxiv.2602.24289"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.24289","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.24289","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.24289","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008937103","display_name":"Shengqu Cai","orcid":"https://orcid.org/0000-0002-3063-9644"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cai, Shengqu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017089261","display_name":"Weili Nie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nie, Weili","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127806776","display_name":"Chao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127852248","display_name":"Julius Berner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Berner, Julius","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079385579","display_name":"Lvmin Zhang","orcid":"https://orcid.org/0000-0003-3503-5791"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Lvmin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113099534","display_name":"Nanye Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Nanye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127786780","display_name":"Hansheng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Hansheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045835385","display_name":"Maneesh Agrawala","orcid":"https://orcid.org/0000-0002-8996-7327"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agrawala, Maneesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127858191","display_name":"Leonidas Guibas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guibas, Leonidas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122847770","display_name":"Gordon Wetzstein","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wetzstein, Gordon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127833375","display_name":"Arash Vahdat","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vahdat, Arash","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5008937103"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.814300000667572,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.814300000667572,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.04699999839067459,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.018400000408291817,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.6603999733924866},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4950000047683716},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4383000135421753},{"id":"https://openalex.org/keywords/mode","display_name":"Mode (computer interface)","score":0.4207000136375427},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.41850000619888306},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.41530001163482666},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.3912999927997589},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.3409000039100647}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.720300018787384},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.6603999733924866},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5766000151634216},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5216000080108643},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4950000047683716},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4383000135421753},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.41850000619888306},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.41530001163482666},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.3912999927997589},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2904999852180481},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.28630000352859497},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.2687000036239624},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.257099986076355},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.24289","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.24289","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.24289","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.24289","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6984317302703857,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scaling":[0],"video":[1,144],"generation":[2],"from":[3,45,113],"seconds":[4],"to":[5,25,73,89,133],"minutes":[6],"faces":[7],"a":[8,33,50,54,61,80,90,95,134,140],"critical":[9],"bottleneck:":[10],"while":[11,77,121],"short-video":[12,92,136],"data":[13,20],"is":[14,21],"abundant":[15],"and":[16,23,111,162],"high-fidelity,":[17],"coherent":[18],"long-form":[19],"scarce":[22],"limited":[24,114],"narrow":[26],"domains.":[27],"To":[28],"address":[29],"this,":[30],"we":[31],"propose":[32],"training":[34],"paradigm":[35],"where":[36],"Mode":[37],"Seeking":[38],"meets":[39],"Mean":[40],"Seeking,":[41],"decoupling":[42],"local":[43,81,123,159],"fidelity":[44],"long-term":[46],"coherence":[47,110],"based":[48],"on":[49,70],"unified":[51],"representation":[52],"via":[53,67,94,117],"Decoupled":[55],"Diffusion":[56],"Transformer.":[57],"Our":[58],"approach":[59],"utilizes":[60],"global":[62],"Flow":[63],"Matching":[64,83],"head":[65,84],"trained":[66],"supervised":[68,118],"learning":[69],"long":[71,115,143],"videos":[72,106,116],"capture":[74],"narrative":[75],"structure,":[76],"simultaneously":[78],"employing":[79],"Distribution":[82],"that":[85,107,148],"aligns":[86],"sliding":[87],"windows":[88],"frozen":[91,135],"teacher":[93],"mode-seeking":[96],"reverse-KL":[97],"divergence.":[98],"This":[99],"strategy":[100],"enables":[101],"the":[102,131,153],"synthesis":[103],"of":[104,130],"minute-scale":[105],"learns":[108],"long-range":[109,163],"motions":[112],"flow":[119],"matching,":[120],"inheriting":[122],"realism":[124],"by":[125,156],"aligning":[126],"every":[127],"sliding-window":[128],"segment":[129],"student":[132],"teacher,":[137],"resulting":[138],"in":[139],"few-step":[141],"fast":[142],"generator.":[145],"Evaluations":[146],"show":[147],"our":[149],"method":[150],"effectively":[151],"closes":[152],"fidelity-horizon":[154],"gap":[155],"jointly":[157],"improving":[158],"sharpness,":[160],"motion":[161],"consistency.":[164],"Project":[165],"website:":[166],"https://primecai.github.io/mmm/.":[167]},"counts_by_year":[],"updated_date":"2026-03-03T06:18:10.843953","created_date":"2026-03-03T00:00:00"}
