{"id":"https://openalex.org/W4415540644","doi":"https://doi.org/10.1145/3746027.3755638","title":"DLFR-VAE: Dynamic Latent Frame Rate VAE for Video Generation","display_name":"DLFR-VAE: Dynamic Latent Frame Rate VAE for Video Generation","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540644","doi":"https://doi.org/10.1145/3746027.3755638"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755638","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755638","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746027.3755638","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101876815","display_name":"Zhihang Yuan","orcid":"https://orcid.org/0000-0001-7846-0240"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhihang Yuan","raw_affiliation_strings":["Tsinghua University, Beijing, China and Infinigence AI, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7846-0240","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China and Infinigence AI, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Siyuan Wang","orcid":"https://orcid.org/0009-0007-0934-1572"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyuan Wang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-0934-1572","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113232966","display_name":"Yuzhang Shang","orcid":"https://orcid.org/0000-0003-2286-6668"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuzhang Shang","raw_affiliation_strings":["University of Central Florida, Orlando, USA"],"raw_orcid":"https://orcid.org/0000-0003-2286-6668","affiliations":[{"raw_affiliation_string":"University of Central Florida, Orlando, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hanling Zhang","orcid":"https://orcid.org/0009-0006-7388-8145"},"institutions":[{"id":"https://openalex.org/I4210145118","display_name":"Infinitus (China)","ror":"https://ror.org/03pchte23","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210145118"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanling Zhang","raw_affiliation_strings":["Infinigence AI, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-7388-8145","affiliations":[{"raw_affiliation_string":"Infinigence AI, Beijing, China","institution_ids":["https://openalex.org/I4210145118"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113233261","display_name":"Tongcheng Fang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tongcheng Fang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-7279-8359","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Rui Xie","orcid":"https://orcid.org/0009-0008-1318-0924"},"institutions":[{"id":"https://openalex.org/I4210145118","display_name":"Infinitus (China)","ror":"https://ror.org/03pchte23","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210145118"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Xie","raw_affiliation_strings":["Infinigence AI, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-1318-0924","affiliations":[{"raw_affiliation_string":"Infinigence AI, Beijing, China","institution_ids":["https://openalex.org/I4210145118"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049910854","display_name":"Shengen Yan","orcid":"https://orcid.org/0009-0005-3858-7972"},"institutions":[{"id":"https://openalex.org/I4210145118","display_name":"Infinitus (China)","ror":"https://ror.org/03pchte23","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210145118"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengen Yan","raw_affiliation_strings":["Infinigence AI, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-3858-7972","affiliations":[{"raw_affiliation_string":"Infinigence AI, Beijing, China","institution_ids":["https://openalex.org/I4210145118"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Guohao Dai","orcid":"https://orcid.org/0009-0000-7000-6537"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guohao Dai","raw_affiliation_strings":["Shanghai Jiaotong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0000-7000-6537","affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100445061","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0001-6108-5157"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6108-5157","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101876815"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.1886,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.89944853,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10388","last_page":"10397"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6114000082015991},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5296000242233276},{"id":"https://openalex.org/keywords/residual-frame","display_name":"Residual frame","score":0.5181999802589417},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5001999735832214},{"id":"https://openalex.org/keywords/frame-rate","display_name":"Frame rate","score":0.4722999930381775},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.45010000467300415},{"id":"https://openalex.org/keywords/reference-frame","display_name":"Reference frame","score":0.38420000672340393},{"id":"https://openalex.org/keywords/key-frame","display_name":"Key frame","score":0.3747999966144562},{"id":"https://openalex.org/keywords/inter-frame","display_name":"Inter frame","score":0.37369999289512634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8185999989509583},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6114000082015991},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5710999965667725},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5296000242233276},{"id":"https://openalex.org/C204641915","wikidata":"https://www.wikidata.org/wiki/Q7315509","display_name":"Residual frame","level":4,"score":0.5181999802589417},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5001999735832214},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4754999876022339},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.4722999930381775},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.45010000467300415},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.38420000672340393},{"id":"https://openalex.org/C2780139006","wikidata":"https://www.wikidata.org/wiki/Q1493902","display_name":"Key frame","level":3,"score":0.3747999966144562},{"id":"https://openalex.org/C39394851","wikidata":"https://www.wikidata.org/wiki/Q921594","display_name":"Inter frame","level":4,"score":0.37369999289512634},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.36730000376701355},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.35760000348091125},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.31779998540878296},{"id":"https://openalex.org/C65965080","wikidata":"https://www.wikidata.org/wiki/Q1806885","display_name":"Latent variable model","level":3,"score":0.31279999017715454},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.27230000495910645},{"id":"https://openalex.org/C167510206","wikidata":"https://www.wikidata.org/wiki/Q2835824","display_name":"Block-matching algorithm","level":4,"score":0.26600000262260437},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2522999942302704},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755638","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755638","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746027.3755638","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755638","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2134120396","https://openalex.org/W2169586400","https://openalex.org/W2900994596","https://openalex.org/W2962785568","https://openalex.org/W2990503944","https://openalex.org/W3035413240","https://openalex.org/W3155190816","https://openalex.org/W3180355996","https://openalex.org/W4252713891","https://openalex.org/W4312933868","https://openalex.org/W4386065704","https://openalex.org/W4389953010","https://openalex.org/W4390872297","https://openalex.org/W4390874074","https://openalex.org/W4390874580","https://openalex.org/W4402660110","https://openalex.org/W4402778510"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1,59],"paper,":[2],"we":[3,38],"propose":[4],"the":[5,64,70,145],"Dynamic":[6,81],"Latent":[7,82],"Frame":[8,83],"Rate":[9,84],"VAE":[10,112,116],"(DLFR-VAE),":[11],"a":[12,80,105,133],"training-free":[13,106],"paradigm":[14],"that":[15,40,86,109,117],"can":[16,118,130],"make":[17],"use":[18],"of":[19],"adaptive":[20],"temporal":[21,46,90],"compression":[22,33],"in":[23],"latent":[24,65],"space.":[25],"While":[26],"existing":[27,139],"video":[28,42,140,146],"generative":[29],"models":[30,142],"apply":[31],"fixed":[32],"rates":[34,97],"via":[35],"pretrained":[36,111],"VAE,":[37],"observe":[39],"real-world":[41],"content":[43,71,101],"exhibits":[44],"substantial":[45],"non-uniformity,":[47],"with":[48,121,138],"high-motion":[49],"segments":[50],"containing":[51],"more":[52],"information":[53],"than":[54],"static":[55],"scenes.":[56],"Based":[57],"on":[58,99],"insight,":[60],"DLFR-VAE":[61,74,129],"dynamically":[62],"adjusts":[63],"frame":[66,96,123],"rate":[67],"according":[68],"to":[69,114],"complexity.":[72],"Specifically,":[73],"comprises":[75],"two":[76],"core":[77],"innovations:":[78],"(1)":[79],"Scheduler":[85],"partitions":[87],"videos":[88],"into":[89],"chunks":[91],"and":[92,103,143],"adaptively":[93],"determines":[94],"optimal":[95],"based":[98],"information-theoretic":[100],"complexity,":[102],"(2)":[104],"adaptation":[107],"mechanism":[108],"transforms":[110],"architectures":[113],"dynamic":[115],"process":[119],"features":[120],"variable":[122],"rates.":[124],"Our":[125],"simple":[126],"but":[127],"effective":[128],"function":[131],"as":[132],"plug-and-play":[134],"module,":[135],"seamlessly":[136],"integrating":[137],"generation":[141,147],"accelerating":[144],"process.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-25T00:00:00"}
