{"id":"https://openalex.org/W4417099856","doi":"https://doi.org/10.1109/iccv51701.2025.01577","title":"Versatile Transition Generation with Image-to-Video Diffusion","display_name":"Versatile Transition Generation with Image-to-Video Diffusion","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4417099856","doi":"https://doi.org/10.1109/iccv51701.2025.01577"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01577","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.01698","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088457247","display_name":"Zuhao Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Zuhao Yang","raw_affiliation_strings":["Nanyang Technological University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057878085","display_name":"Jiahui Zhang","orcid":"https://orcid.org/0000-0002-6629-7471"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jiahui Zhang","raw_affiliation_strings":["Nanyang Technological University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090126397","display_name":"Yingchen Yu","orcid":"https://orcid.org/0000-0002-7893-0764"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yingchen Yu","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023507910","display_name":"Shijian Lu","orcid":"https://orcid.org/0000-0002-6766-2506"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shijian Lu","raw_affiliation_strings":["Nanyang Technological University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":null,"display_name":"Song Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song Bai","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5088457247"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36998151,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"16981","last_page":"16990"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9253000020980835,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9253000020980835,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.013899999670684338,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.009800000116229057,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.7023000121116638},{"id":"https://openalex.org/keywords/transition","display_name":"Transition (genetics)","score":0.6643999814987183},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5055999755859375},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4936999976634979},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4325000047683716},{"id":"https://openalex.org/keywords/motion-compensation","display_name":"Motion compensation","score":0.43050000071525574},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4189999997615814},{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.40880000591278076}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.7023000121116638},{"id":"https://openalex.org/C194232998","wikidata":"https://www.wikidata.org/wiki/Q1606712","display_name":"Transition (genetics)","level":3,"score":0.6643999814987183},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6437000036239624},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5055999755859375},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4936999976634979},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4871000051498413},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4442000091075897},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4325000047683716},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.43050000071525574},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4198000133037567},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4189999997615814},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.40880000591278076},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.3228999972343445},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.29010000824928284},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2705000042915344},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2678999900817871},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2671999931335449},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01577","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2508.01698","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01698","pdf_url":"https://arxiv.org/pdf/2508.01698","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.01698","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.01698","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.01698","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01698","pdf_url":"https://arxiv.org/pdf/2508.01698","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Leveraging":[0],"text,":[1],"images,":[2],"structure":[3],"maps,":[4],"or":[5],"motion":[6,85,101],"trajectories":[7],"as":[8,37,39],"conditional":[9],"guidance,":[10],"diffusion":[11,98],"models":[12,99],"have":[13],"achieved":[14],"great":[15],"success":[16],"in":[17,100],"automated":[18],"and":[19,26,33,60,74,87,103,110,134],"high-quality":[20],"video":[21,35,52,63],"generation.":[22],"However,":[23],"generating":[24],"smooth":[25],"rational":[27],"transition":[28,116,125,130,144],"videos":[29],"given":[30],"the":[31,93],"first":[32],"last":[34],"frames":[36],"well":[38],"descriptive":[40],"text":[41],"prompts":[42],"is":[43],"far":[44],"underexplored.":[45],"We":[46],"present":[47],"VTG,":[48],"a":[49,121],"Versatile":[50],"Transition":[51],"Generation":[53],"framework":[54],"that":[55,69,140],"can":[56],"generate":[57],"smooth,":[58],"high-fidelity,":[59],"semantically":[61],"coherent":[62],"transitions.":[64],"VTG":[65,109,141],"introduces":[66],"interpolation-based":[67],"initialization":[68],"helps":[70],"preserve":[71],"object":[72],"identity":[73],"handle":[75],"abrupt":[76],"content":[77],"changes":[78],"effectively.":[79],"In":[80],"addition,":[81],"it":[82],"incorporates":[83],"dual-directional":[84],"fine-tuning":[86],"representation":[88],"alignment":[89],"regularization":[90],"to":[91],"mitigate":[92],"limitations":[94],"of":[95],"pre-trained":[96],"image-to-video":[97],"smoothness":[102],"generation":[104,126],"fidelity,":[105],"respectively.":[106],"To":[107],"evaluate":[108],"facilitate":[111],"future":[112],"studies":[113],"on":[114],"unified":[115],"generation,":[117],"we":[118],"collected":[119],"TransitBench,":[120],"comprehensive":[122],"benchmark":[123],"for":[124],"covering":[127],"two":[128],"representative":[129],"tasks:":[131],"concept":[132],"blending":[133],"scene":[135],"transition.":[136],"Extensive":[137],"experiments":[138],"show":[139],"achieves":[142],"superior":[143],"performance":[145],"consistently":[146],"across":[147],"all":[148],"four":[149],"tasks.":[150]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
