{"id":"https://openalex.org/W7140083991","doi":"https://doi.org/10.18653/v1/2026.eacl-long.101","title":"MAViS: A Multi-Agent Framework for Long-Sequence Video Storytelling","display_name":"MAViS: A Multi-Agent Framework for Long-Sequence Video Storytelling","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7140083991","doi":"https://doi.org/10.18653/v1/2026.eacl-long.101"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2026.eacl-long.101","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.101","pdf_url":"https://aclanthology.org/2026.eacl-long.101.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2026.eacl-long.101.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130344434","display_name":"Qian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130334251","display_name":"Ziqi Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ziqi Huang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032275274","display_name":"Ruoxi Jia","orcid":"https://orcid.org/0000-0001-9662-9556"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruoxi Jia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047876260","display_name":"Paul Debevec","orcid":"https://orcid.org/0000-0001-7381-2323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paul Debevec","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130365367","display_name":"Ning Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ning Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39293662,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2273","last_page":"2295"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.7817000150680542,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.7817000150680542,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.11029999703168869,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.02419999986886978,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.4214000105857849},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.3116999864578247},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.2786000072956085},{"id":"https://openalex.org/keywords/photography","display_name":"Photography","score":0.273499995470047},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.257099986076355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5170999765396118},{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.4214000105857849},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.421099990606308},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.3116999864578247},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C119657128","wikidata":"https://www.wikidata.org/wiki/Q11633","display_name":"Photography","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2669999897480011},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2630000114440918},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2026.eacl-long.101","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.101","pdf_url":"https://aclanthology.org/2026.eacl-long.101.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2026.eacl-long.101","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.101","pdf_url":"https://aclanthology.org/2026.eacl-long.101.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7140083991.pdf","grobid_xml":"https://content.openalex.org/works/W7140083991.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"recent":[1],"advances,":[2],"long-sequence":[3,33],"video":[4,34,51,107,138],"generation":[5,139],"frameworks":[6],"still":[7],"suffer":[8],"from":[9],"significant":[10],"limitations:":[11],"poor":[12],"assistive":[13,102],"capability,":[14,103],"suboptimal":[15],"visual":[16,104,131],"quality,":[17,105],"and":[18,53,65,91,106,118,133,164],"limited":[19],"expressiveness.To":[20],"mitigate":[21],"these":[22],"limitations,":[23],"we":[24,80],"propose":[25,81],"MAViS,":[26],"an":[27],"end-to-end":[28],"multi-agent":[29],"collaborative":[30],"framework":[31,110,156],"for":[32,136],"storytelling.MAViS":[35],"orchestrates":[36],"specialized":[37],"agents":[38,58],"across":[39],"multiple":[40],"stages,":[41],"including":[42],"script":[43],"writing,":[44],"shot":[45],"designing,":[46],"character":[47],"modeling,":[48],"keyframe":[49],"generation,":[50],"animation,":[52],"audio":[54],"generation.In":[55],"each":[56],"stage,":[57],"operate":[59],"under":[60],"the":[61,68,73,82,147,154],"3E":[62],"Principle-Explore,":[63],"Examine,":[64],"Enhance-to":[66],"ensure":[67],"completeness":[69],"of":[70,76,149],"intermediate":[71],"outputs.Considering":[72],"capability":[74],"limitations":[75],"current":[77],"generative":[78,92,116],"models,":[79],"Script":[83],"Writing":[84],"Guidelines":[85],"to":[86,127],"optimize":[87],"compatibility":[88],"between":[89],"scripts":[90],"tools.Experimental":[93],"results":[94],"demonstrate":[95],"that":[96,157],"MAViS":[97,124,152],"achieves":[98],"state-of-the-art":[99],"performance":[100],"in":[101],"expressiveness.Its":[108],"modular":[109],"further":[111],"enables":[112,125],"scalability":[113],"with":[114,162],"diverse":[115,130],"models":[117],"tools.With":[119],"just":[120],"a":[121],"brief":[122],"prompt,":[123],"users":[126],"rapidly":[128],"explore":[129],"storytelling":[132],"creative":[134],"directions":[135],"sequential":[137],"by":[140],"efficiently":[141],"producing":[142],"high-quality,":[143],"complete":[144],"longsequence":[145],"videos.To":[146],"best":[148],"our":[150],"knowledge,":[151],"is":[153],"only":[155],"provides":[158],"multimodal":[159],"design":[160],"output-videos":[161],"narratives":[163],"background":[165],"music.":[166]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
