{"id":"https://openalex.org/W7128386780","doi":"https://doi.org/10.48550/arxiv.2602.06871","title":"RFDM: Residual Flow Diffusion Model for Efficient Causal Video Editing","display_name":"RFDM: Residual Flow Diffusion Model for Efficient Causal Video Editing","publication_year":2026,"publication_date":"2026-02-06","ids":{"openalex":"https://openalex.org/W7128386780","doi":"https://doi.org/10.48550/arxiv.2602.06871"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.06871","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103806056","display_name":"Mohammadreza Salehi","orcid":"https://orcid.org/0000-0002-9247-9439"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Salehi, Mohammadreza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061617355","display_name":"Mehdi Noroozi","orcid":"https://orcid.org/0000-0001-6206-8121"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Noroozi, Mehdi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125381684","display_name":"Luca Morreale","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morreale, Luca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125418517","display_name":"Ruchika Chavhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chavhan, Ruchika","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078019808","display_name":"Malcolm Chadwick","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chadwick, Malcolm","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125408225","display_name":"Alberto Gil Couto Pimentel Ramos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramos, Alberto Gil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103129498","display_name":"Abhinav Mehrotra","orcid":"https://orcid.org/0000-0001-6751-0302"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mehrotra, Abhinav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.593999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.593999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2653999924659729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12720","display_name":"Multimedia Communication and Technology","score":0.01769999973475933,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/video-editing","display_name":"Video editing","score":0.7718999981880188},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.5873000025749207},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5630000233650208},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.4867999851703644},{"id":"https://openalex.org/keywords/video-denoising","display_name":"Video denoising","score":0.4796000123023987},{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.46650001406669617},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.4490000009536743},{"id":"https://openalex.org/keywords/motion-compensation","display_name":"Motion compensation","score":0.41999998688697815},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4187999963760376}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8371000289916992},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.7718999981880188},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.5873000025749207},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5630000233650208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.510699987411499},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5023999810218811},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.4867999851703644},{"id":"https://openalex.org/C30814859","wikidata":"https://www.wikidata.org/wiki/Q4119603","display_name":"Video denoising","level":5,"score":0.4796000123023987},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.46650001406669617},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.4490000009536743},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.41999998688697815},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4187999963760376},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.415800005197525},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.3840999901294708},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.383899986743927},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.37950000166893005},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.34389999508857727},{"id":"https://openalex.org/C87829876","wikidata":"https://www.wikidata.org/wiki/Q648877","display_name":"Post-production","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.31949999928474426},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.30410000681877136},{"id":"https://openalex.org/C204641915","wikidata":"https://www.wikidata.org/wiki/Q7315509","display_name":"Residual frame","level":4,"score":0.2957000136375427},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2533000111579895},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2533000111579895},{"id":"https://openalex.org/C151211776","wikidata":"https://www.wikidata.org/wiki/Q2778015","display_name":"Video capture","level":3,"score":0.25270000100135803},{"id":"https://openalex.org/C2778598663","wikidata":"https://www.wikidata.org/wiki/Q1407599","display_name":"Video content analysis","level":4,"score":0.2506999969482422}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.06871","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.06871","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.06871","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.06871","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4884369671344757,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Instructional":[0],"video":[1,8,31,41,48,155,188],"editing":[2,49,75,150],"applies":[3],"edits":[4,52],"to":[5,72,108],"an":[6],"input":[7,187],"using":[9],"only":[10],"text":[11],"prompts,":[12],"enabling":[13],"intuitive":[14],"natural-language":[15],"control.":[16],"Despite":[17],"rapid":[18],"progress,":[19],"most":[20],"methods":[21,148,167],"still":[22],"require":[23],"fixed-length":[24],"inputs":[25],"and":[26,69,116,161,168,183],"substantial":[27],"compute.":[28],"Meanwhile,":[29],"autoregressive":[30],"generation":[32],"enables":[33],"efficient":[34,47],"variable-length":[35,53],"synthesis,":[36],"yet":[37],"remains":[38],"under-explored":[39],"for":[40,149,157],"editing.":[42],"We":[43,120],"introduce":[44],"a":[45,63,97,141],"causal,":[46],"model":[50,68,107],"that":[51,104,144],"videos":[54],"frame":[55],"by":[56,76],"frame.":[57],"For":[58],"efficiency,":[59],"we":[60,95,139],"start":[61],"from":[62],"2D":[64],"image-to-image":[65],"(I2I)":[66],"diffusion":[67,100],"adapt":[70],"it":[71],"video-to-video":[73],"(V2V)":[74],"conditioning":[77],"the":[78,85,106,110,113,117,130,178],"edit":[79],"at":[80,88],"time":[81],"step":[82],"t":[83],"on":[84,133,153],"model's":[86],"prediction":[87],"t-1.":[89],"To":[90],"leverage":[91],"videos'":[92],"temporal":[93],"redundancy,":[94],"propose":[96,140],"new":[98,142],"I2I":[99],"forward":[101],"process":[102,132],"formulation":[103],"encourages":[105],"predict":[109],"residual":[111],"between":[112,135],"target":[114],"output":[115],"previous":[118],"prediction.":[119],"call":[121],"this":[122],"Residual":[123],"Flow":[124],"Diffusion":[125],"Model":[126],"(RFDM),":[127],"which":[128],"focuses":[129],"denoising":[131],"changes":[134],"consecutive":[136],"frames.":[137],"Moreover,":[138],"benchmark":[143],"better":[145],"ranks":[146],"state-of-the-art":[147],"tasks.":[151],"Trained":[152],"paired":[154],"data":[156],"global/local":[158],"style":[159],"transfer":[160],"object":[162],"removal,":[163],"RFDM":[164],"surpasses":[165],"I2I-based":[166],"competes":[169],"with":[170],"fully":[171],"spatiotemporal":[172],"(3D)":[173],"V2V":[174],"models,":[175],"while":[176],"matching":[177],"compute":[179],"of":[180,186],"image":[181],"models":[182],"scaling":[184],"independently":[185],"length.":[189],"More":[190],"content":[191],"can":[192],"be":[193],"found":[194],"in:":[195],"https://smsd75.github.io/RFDM_page/":[196]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-10T00:00:00"}
