{"id":"https://openalex.org/W4415536361","doi":"https://doi.org/10.1145/3746027.3755081","title":"Individual Content and Motion Dynamics Preserved Pruning for Video Diffusion Models","display_name":"Individual Content and Motion Dynamics Preserved Pruning for Video Diffusion Models","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415536361","doi":"https://doi.org/10.1145/3746027.3755081"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755081","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755081","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101490743","display_name":"Yiming Wu","orcid":"https://orcid.org/0000-0002-9866-669X"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yiming Wu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064120901","display_name":"Zhenghao Chen","orcid":"https://orcid.org/0000-0003-0155-4462"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhenghao Chen","raw_affiliation_strings":["University of Newcastle, Newcastle, Australia"],"affiliations":[{"raw_affiliation_string":"University of Newcastle, Newcastle, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100331980","display_name":"Huan Wang","orcid":"https://orcid.org/0000-0001-6951-901X"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huan Wang","raw_affiliation_strings":["Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082181536","display_name":"Dong Xu","orcid":"https://orcid.org/0000-0003-2775-9730"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Dong Xu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101490743"],"corresponding_institution_ids":["https://openalex.org/I177725633","https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30326564,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9714","last_page":"9723"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6014999747276306},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5379999876022339},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.535099983215332},{"id":"https://openalex.org/keywords/motion-compensation","display_name":"Motion compensation","score":0.5044999718666077},{"id":"https://openalex.org/keywords/content","display_name":"Content (measure theory)","score":0.4945000112056732},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.4812999963760376},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4715000092983246},{"id":"https://openalex.org/keywords/video-quality","display_name":"Video quality","score":0.4528000056743622},{"id":"https://openalex.org/keywords/motion-estimation","display_name":"Motion estimation","score":0.4246000051498413}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7763000130653381},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6014999747276306},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5379999876022339},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.535099983215332},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5214999914169312},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5149000287055969},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.5044999718666077},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.4945000112056732},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.4812999963760376},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4715000092983246},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.4528000056743622},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.4246000051498413},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.40139999985694885},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3813000023365021},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3752000033855438},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.3723999857902527},{"id":"https://openalex.org/C167510206","wikidata":"https://www.wikidata.org/wiki/Q2835824","display_name":"Block-matching algorithm","level":4,"score":0.3646000027656555},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3578000068664551},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C174493125","wikidata":"https://www.wikidata.org/wiki/Q1073461","display_name":"Quarter-pixel motion","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2806999981403351},{"id":"https://openalex.org/C39394851","wikidata":"https://www.wikidata.org/wiki/Q921594","display_name":"Inter frame","level":4,"score":0.2590999901294708},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755081","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755081","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2294370754","https://openalex.org/W3096831136","https://openalex.org/W4210274484","https://openalex.org/W4239681197","https://openalex.org/W4404520210","https://openalex.org/W6930322006"],"related_works":[],"abstract_inverted_index":{"The":[0],"high":[1],"computational":[2],"cost":[3],"and":[4,32,37,105,146,197,210,224,243],"slow":[5],"inference":[6,174],"time":[7,175],"are":[8,48,66],"major":[9],"obstacles":[10],"to":[11,111,119,132,158],"deploying":[12],"Video":[13,24],"Diffusion":[14,25],"Models":[15],"(VDMs).":[16],"To":[17],"overcome":[18],"this,":[19],"we":[20,41,76,100,123,150,201],"introduce":[21,151],"a":[22,93,152,168],"new":[23],"Model":[26],"Compression":[27],"approach":[28],"using":[29],"individual":[30,70,73],"content":[31,71],"motion":[33,55,161],"dynamics":[34,56,162],"preserved":[35],"pruning":[36],"consistency":[38,135],"loss.":[39],"First,":[40],"empirically":[42],"observe":[43],"that":[44],"deeper":[45,89],"VDM":[46,95,118],"layers":[47,65,83],"crucial":[49],"for":[50,215],"maintaining":[51,177,230],"the":[52,60,81,88,126,134,137,144,160,164,184,216,220,225,231,234],"quality":[53,232],"of":[54,59,87,139,186,233],"(e.g.,":[57,72],"coherence":[58],"entire":[61],"video),":[62],"while":[63,84,176,229],"shallower":[64,82],"more":[67,86],"focused":[68],"on":[69,189,237],"frames).":[74],"Therefore,":[75],"prune":[77],"redundant":[78],"blocks":[79],"from":[80],"preserving":[85],"layers,":[90],"resulting":[91],"in":[92,136],"lightweight":[94],"variant":[96],"called":[97],"VDMini.":[98,120],"Moreover,":[99],"propose":[101],"an":[102,204],"Individual":[103,127],"Content":[104,128,154],"Motion":[106],"Dynamics":[107],"(ICMD)":[108],"Consistency":[109],"Loss":[110,131,157],"gain":[112],"comparable":[113],"generation":[114,193],"performance":[115],"as":[116,167],"larger":[117],"In":[121],"particular,":[122],"first":[124],"use":[125],"Distillation":[129],"(ICD)":[130],"preserve":[133],"features":[138],"each":[140],"generated":[141,165,235],"frame":[142],"between":[143],"teacher":[145],"student":[147],"models.":[148],"Next,":[149],"Multi-frame":[153],"Adversarial":[155],"(MCA)":[156],"enhance":[159],"across":[163],"video":[166,179,192],"whole.":[169],"This":[170],"method":[171,218,222,227],"significantly":[172],"accelerates":[173],"high-quality":[178],"generation.":[180],"Extensive":[181],"experiments":[182],"demonstrate":[183],"effectiveness":[185],"our":[187],"VDMini":[188],"two":[190],"important":[191],"tasks,":[194],"Text-to-Video":[195],"(T2V)":[196],"Image-to-Video":[198],"(I2V),":[199],"where":[200],"respectively":[202],"achieve":[203],"average":[205],"2.5":[206],"\u00d7,":[207,209],"1.4":[208],"1.25":[211],"\u00d7":[212],"speed":[213],"up":[214],"I2V":[217],"SF-V,":[219],"T2V":[221,226],"T2V-Turbo-v2,":[223],"HunyuanVideo,":[228],"videos":[236],"several":[238],"benchmarks":[239],"including":[240],"UCF101,":[241],"VBench-T2V,":[242],"VBench-I2V.":[244]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
