{"id":"https://openalex.org/W4417531483","doi":"https://doi.org/10.48550/arxiv.2512.16920","title":"EasyV2V: A High-quality Instruction-based Video Editing Framework","display_name":"EasyV2V: A High-quality Instruction-based Video Editing Framework","publication_year":2025,"publication_date":"2025-12-18","ids":{"openalex":"https://openalex.org/W4417531483","doi":"https://doi.org/10.48550/arxiv.2512.16920"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.16920","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.16920","pdf_url":"https://arxiv.org/pdf/2512.16920","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.16920","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Mai, Jinjie","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mai, Jinjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Chaoyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chaoyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Qian, Guocheng Gordon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Guocheng Gordon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Menapace, Willi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Menapace, Willi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tulyakov, Sergey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tulyakov, Sergey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ghanem, Bernard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghanem, Bernard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wonka, Peter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wonka, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Mirzaei, Ashkan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mirzaei, Ashkan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.448199987411499,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.448199987411499,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.2953000068664551,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09380000084638596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.16920","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.16920","pdf_url":"https://arxiv.org/pdf/2512.16920","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.16920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.16920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.16920","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.16920","pdf_url":"https://arxiv.org/pdf/2512.16920","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"image":[1,57],"editing":[2,7,98,149],"has":[3],"advanced":[4],"rapidly,":[5],"video":[6,38,54,76,148],"remains":[8],"less":[9],"explored,":[10],"facing":[11],"challenges":[12],"in":[13],"consistency,":[14],"control,":[15,27,120],"and":[16,26,28,33,65,78,130,145,153],"generalization.":[17],"We":[18],"study":[19],"the":[20,41,88],"design":[21],"space":[22],"of":[23],"data,":[24],"architecture,":[25],"introduce":[29],"\\emph{EasyV2V},":[30],"a":[31,101,116,126],"simple":[32],"effective":[34],"framework":[35],"for":[36,75,107],"instruction-based":[37],"editing.":[39],"On":[40,87],"data":[42],"side,":[43,90],"we":[44,91,121],"compose":[45],"existing":[46],"experts":[47],"with":[48,68,109,138],"fast":[49],"inverses":[50],"to":[51,82,114],"build":[52],"diverse":[53],"pairs,":[55,77],"lift":[56],"edit":[58],"pairs":[59,67],"into":[60],"videos":[61],"via":[62,125],"single-frame":[63],"supervision":[64,81],"pseudo":[66],"shared":[69],"affine":[70],"motion,":[71],"mine":[72],"dense-captioned":[73],"clips":[74],"add":[79],"transition":[80],"teach":[83],"how":[84],"edits":[85],"unfold.":[86],"model":[89],"observe":[92],"that":[93],"pretrained":[94],"text-to-video":[95],"models":[96],"possess":[97],"capability,":[99],"motivating":[100],"simplified":[102],"design.":[103],"Simple":[104],"sequence":[105],"concatenation":[106],"conditioning":[108],"light":[110],"LoRA":[111],"fine-tuning":[112],"suffices":[113],"train":[115],"strong":[117],"model.":[118],"For":[119],"unify":[122],"spatiotemporal":[123],"control":[124],"single":[127],"mask":[128],"mechanism":[129],"support":[131],"optional":[132],"reference":[133],"images.":[134],"Overall,":[135],"EasyV2V":[136],"works":[137],"flexible":[139],"inputs,":[140],"e.g.,":[141],"video+text,":[142],"video+mask+text,":[143],"video+mask+reference+text,":[144],"achieves":[146],"state-of-the-art":[147],"results,":[150],"surpassing":[151],"concurrent":[152],"commercial":[154],"systems.":[155],"Project":[156],"page:":[157],"https://snap-research.github.io/easyv2v/":[158]},"counts_by_year":[],"updated_date":"2026-03-03T08:47:05.690250","created_date":"2025-12-21T00:00:00"}
