{"id":"https://openalex.org/W4414359272","doi":"https://doi.org/10.24963/ijcai.2025/1120","title":"FancyVideo: Towards Dynamic and Consistent Video Generation via Cross-frame Textual Guidance","display_name":"FancyVideo: Towards Dynamic and Consistent Video Generation via Cross-frame Textual Guidance","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414359272","doi":"https://doi.org/10.24963/ijcai.2025/1120"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/1120","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114230981","display_name":"Jiasong Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I2799847335","display_name":"Art Institute of Portland","ror":"https://ror.org/01cb0jg64","country_code":"US","type":"education","lineage":["https://openalex.org/I2799847335","https://openalex.org/I2799969541"]},{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Jiasong Feng","raw_affiliation_strings":["360 AI Research,","Beijing University of Technology"],"affiliations":[{"raw_affiliation_string":"360 AI Research,","institution_ids":["https://openalex.org/I2799847335"]},{"raw_affiliation_string":"Beijing University of Technology","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071003083","display_name":"Ao Ma","orcid":"https://orcid.org/0000-0002-4815-6704"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I2799847335","display_name":"Art Institute of Portland","ror":"https://ror.org/01cb0jg64","country_code":"US","type":"education","lineage":["https://openalex.org/I2799847335","https://openalex.org/I2799969541"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Ao Ma","raw_affiliation_strings":["360 AI Research,","Wuhan University"],"affiliations":[{"raw_affiliation_string":"360 AI Research,","institution_ids":["https://openalex.org/I2799847335"]},{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010739974","display_name":"Jing Wang","orcid":"https://orcid.org/0000-0003-1866-5011"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]},{"id":"https://openalex.org/I2799847335","display_name":"Art Institute of Portland","ror":"https://ror.org/01cb0jg64","country_code":"US","type":"education","lineage":["https://openalex.org/I2799847335","https://openalex.org/I2799969541"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Jing Wang","raw_affiliation_strings":["360 AI Research,","Sun Yat-sen University"],"affiliations":[{"raw_affiliation_string":"360 AI Research,","institution_ids":["https://openalex.org/I2799847335"]},{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101875568","display_name":"Ke Cao","orcid":"https://orcid.org/0009-0000-4421-0817"},"institutions":[{"id":"https://openalex.org/I2799847335","display_name":"Art Institute of Portland","ror":"https://ror.org/01cb0jg64","country_code":"US","type":"education","lineage":["https://openalex.org/I2799847335","https://openalex.org/I2799969541"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Ke Cao","raw_affiliation_strings":["360 AI Research,","University of Science and Technology of China"],"affiliations":[{"raw_affiliation_string":"360 AI Research,","institution_ids":["https://openalex.org/I2799847335"]},{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061370730","display_name":"Zhanjie Zhang","orcid":"https://orcid.org/0000-0003-0191-5715"},"institutions":[{"id":"https://openalex.org/I2799847335","display_name":"Art Institute of Portland","ror":"https://ror.org/01cb0jg64","country_code":"US","type":"education","lineage":["https://openalex.org/I2799847335","https://openalex.org/I2799969541"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Zhanjie Zhang","raw_affiliation_strings":["360 AI Research,","Zhejiang University"],"affiliations":[{"raw_affiliation_string":"360 AI Research,","institution_ids":["https://openalex.org/I2799847335"]},{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114230981"],"corresponding_institution_ids":["https://openalex.org/I2799847335","https://openalex.org/I37796252"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26674947,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10081","last_page":"10089"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12720","display_name":"Multimedia Communication and Technology","score":0.95660001039505,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.6733999848365784},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6272000074386597},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5358999967575073},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5074999928474426},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.44859999418258667},{"id":"https://openalex.org/keywords/on-the-fly","display_name":"On the fly","score":0.3562999963760376}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8004999756813049},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.6733999848365784},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6272000074386597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5389000177383423},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5358999967575073},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5074999928474426},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.44859999418258667},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.3379000127315521},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3325999975204468},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2985999882221222},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.28040000796318054},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2572000026702881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/1120","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Synthesizing":[0],"motion-rich":[1],"and":[2,50,91,99,135,147,166,172],"temporally":[3],"consistent":[4,173],"videos":[5,52,198],"remains":[6],"a":[7,184],"challenge":[8],"in":[9,48],"artificial":[10],"intelligence,":[11],"especially":[12],"when":[13],"dealing":[14],"with":[15,53,75],"extended":[16],"durations.":[17],"Existing":[18],"text-to-video":[19],"(T2V)":[20],"models":[21],"commonly":[22],"employ":[23],"spatial":[24],"cross-attention":[25],"for":[26],"text":[27,118],"control,":[28],"equivalently":[29],"guiding":[30],"different":[31],"frame":[32],"generations":[33],"without":[34],"frame-specific":[35,106,112],"textual":[36,107,123,133],"guidance.":[37,108],"Thus,":[38],"the":[39,44,71,76,86,97,128,139,151,163,168,177,195,203],"model's":[40],"capacity":[41],"to":[42,104],"comprehend":[43],"temporal":[45],"logic":[46],"conveyed":[47],"prompts":[49],"generate":[51],"coherent":[54],"motion":[55],"is":[56,217],"restricted.":[57],"To":[58],"tackle":[59],"this":[60],"limitation,":[61],"we":[62],"introduce":[63],"FancyVideo,":[64],"an":[65],"innovative":[66],"video":[67],"generator":[68],"that":[69,176,214],"improves":[70],"existing":[72],"text-control":[73],"mechanism":[74],"well-designed":[77],"Cross-frame":[78],"Textual":[79],"Guidance":[80],"Module":[81],"(CTGM).":[82],"Specifically,":[83],"CTGM":[84],"incorporates":[85],"Temporal":[87,92],"Information":[88],"Injector":[89],"(TII)":[90],"Affinity":[93],"Refiner":[94],"(TAR)":[95],"at":[96],"beginning":[98],"end":[100],"of":[101,153,170,180,197,210],"cross-attention,":[102],"respectively,":[103],"achieve":[105],"Firstly,":[109],"TII":[110],"injects":[111],"information":[113],"from":[114,199],"latent":[115,136],"features":[116,137],"into":[117],"conditions,":[119],"thereby":[120],"obtaining":[121],"cross-frame":[122,132],"conditions.":[124],"Then,":[125],"TAR":[126],"refines":[127],"correlation":[129],"matrix":[130],"between":[131],"conditions":[134],"along":[138],"time":[140],"dimension.":[141],"Extensive":[142],"experiments":[143,211],"comprising":[144],"both":[145],"quantitative":[146],"qualitative":[148],"evaluations":[149],"demonstrate":[150],"effectiveness":[152],"FancyVideo.":[154],"Our":[155],"approach":[156],"achieves":[157],"state-of-the-art":[158],"T2V":[159,178],"generation":[160,196],"results":[161],"on":[162],"EvalCrafter":[164],"benchmark":[165],"facilitates":[167],"synthesis":[169],"dynamic":[171],"videos.":[174],"Note":[175],"process":[179],"FancyVideo":[181],"essentially":[182],"involves":[183],"text-to-image":[185],"step":[186],"followed":[187],"by":[188],"T+I2V.":[189],"This":[190],"means":[191],"it":[192],"also":[193,218],"supports":[194],"user":[200],"images,":[201],"i.e.,":[202],"image-to-video":[204],"(I2V)":[205],"task.":[206],"A":[207],"significant":[208],"number":[209],"have":[212],"shown":[213],"its":[215],"performance":[216],"outstanding.":[219]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
