{"id":"https://openalex.org/W7155052914","doi":"https://doi.org/10.48550/arxiv.2604.17195","title":"DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior","display_name":"DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior","publication_year":2026,"publication_date":"2026-04-19","ids":{"openalex":"https://openalex.org/W7155052914","doi":"https://doi.org/10.48550/arxiv.2604.17195"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.17195","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17195","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.17195","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121663840","display_name":"Junjia Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Junjia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134155808","display_name":"Binbin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Binbin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134210471","display_name":"Pengxiang Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Pengxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134190419","display_name":"Jiyang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134146154","display_name":"Bin Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Bin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134120155","display_name":"Zhao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134200295","display_name":"Yitong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yitong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134141586","display_name":"Liang Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134137296","display_name":"Guanbin Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Guanbin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.46709999442100525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.46709999442100525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.22100000083446503,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.05920000001788139,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/storyboard","display_name":"Storyboard","score":0.9829000234603882},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5878999829292297},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5001999735832214},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.4925999939441681},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.41260001063346863},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.3456999957561493},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.3431999981403351},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.3416000008583069}],"concepts":[{"id":"https://openalex.org/C2777080924","wikidata":"https://www.wikidata.org/wiki/Q334667","display_name":"Storyboard","level":2,"score":0.9829000234603882},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.840499997138977},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5878999829292297},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5134999752044678},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5001999735832214},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.4925999939441681},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.41260001063346863},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39100000262260437},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3456999957561493},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3431999981403351},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3416000008583069},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.29339998960494995},{"id":"https://openalex.org/C88626702","wikidata":"https://www.wikidata.org/wiki/Q1128903","display_name":"Continuation","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2856000065803528},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C54525549","wikidata":"https://www.wikidata.org/wiki/Q2553445","display_name":"Weaving","level":2,"score":0.2702000141143799}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.17195","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17195","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.17195","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17195","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Storyboard":[0],"synthesis":[1],"plays":[2],"a":[3,58,125,141,174],"crucial":[4],"role":[5,127,162],"in":[6,104],"visual":[7,181],"storytelling,":[8],"aiming":[9],"to":[10,38,168],"generate":[11],"coherent":[12,113],"shot":[13],"sequences":[14,114],"that":[15,65,130,156],"visually":[16,110],"narrate":[17],"cinematic":[18],"events":[19],"with":[20,115],"consistent":[21,43],"characters,":[22],"scenes,":[23],"and":[24,46,80,94,111,119,136,150,164],"transitions.":[25],"However,":[26],"existing":[27],"approaches":[28],"are":[29],"mostly":[30],"adapted":[31],"from":[32],"text-to-image":[33,170],"diffusion":[34,70],"models,":[35,107,172],"which":[36],"struggle":[37],"maintain":[39],"long-range":[40],"temporal":[41],"coherence,":[42,161],"character":[44,120,133],"identities,":[45],"narrative":[47,117],"flow":[48],"across":[49],"multiple":[50,132],"shots.":[51],"In":[52],"this":[53],"paper,":[54],"we":[55],"introduce":[56],"DreamShot,":[57],"video":[59,69,105,179],"generative":[60,106],"model":[61],"based":[62],"storyboard":[63,96,171],"framework":[64],"fully":[66],"exploits":[67],"powerful":[68],"priors":[71],"for":[72],"controllable":[73,178],"multi-shot":[74],"synthesis.":[75],"DreamShot":[76,108,123,157],"supports":[77],"both":[78],"Text-to-Shot":[79],"Reference-to-Shot":[81],"generation,":[82],"as":[83,85],"well":[84],"story":[86],"continuation":[87],"conditioned":[88],"on":[89],"previous":[90],"frames,":[91],"enabling":[92],"flexible":[93],"context-aware":[95],"generation.":[97],"By":[98],"leveraging":[99],"the":[100],"spatial-temporal":[101],"consistency":[102],"inherent":[103],"produces":[109],"semantically":[112],"improved":[116],"fidelity":[118],"continuity.":[121],"Furthermore,":[122],"incorporates":[124],"multi-reference":[126],"conditioning":[128],"module":[129],"accepts":[131],"reference":[134,149],"images":[135],"enforces":[137],"identity":[138],"alignment":[139],"via":[140],"Role-Attention":[142],"Consistency":[143],"Loss,":[144],"explicitly":[145],"constraining":[146],"attention":[147],"between":[148],"generated":[151],"roles.":[152],"Extensive":[153],"experiments":[154],"demonstrate":[155],"achieves":[158],"superior":[159],"scene":[160],"consistency,":[163],"generation":[165],"efficiency":[166],"compared":[167],"state-of-the-art":[169],"establishing":[173],"new":[175],"direction":[176],"toward":[177],"model-driven":[180],"storytelling.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
