{"id":"https://openalex.org/W4417125393","doi":"https://doi.org/10.1145/3757377.3764009","title":"AniMaker: Multi-Agent Animated Storytelling with MCTS-Driven Clip Generation","display_name":"AniMaker: Multi-Agent Animated Storytelling with MCTS-Driven Clip Generation","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W4417125393","doi":"https://doi.org/10.1145/3757377.3764009"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3764009","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3764009","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101308411","display_name":"Haoyuan Shi","orcid":"https://orcid.org/0009-0004-3790-0683"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoyuan Shi","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019703861","display_name":"Yunxin Li","orcid":"https://orcid.org/0000-0003-4819-2489"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunxin Li","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114910960","display_name":"Xinyu Chen","orcid":"https://orcid.org/0009-0005-9857-1614"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Chen","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088191810","display_name":"Longyue Wang","orcid":"https://orcid.org/0000-0002-9062-6183"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longyue Wang","raw_affiliation_strings":["Alibaba International Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba International Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083079672","display_name":"Baotian Hu","orcid":"https://orcid.org/0000-0001-7490-684X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baotian Hu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103153167","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0003-4659-1822"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101308411"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":2.3234,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.90427643,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.6207000017166138,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.6207000017166138,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.24719999730587006,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03269999846816063,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.6942999958992004},{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.6668000221252441},{"id":"https://openalex.org/keywords/storyboard","display_name":"Storyboard","score":0.6498000025749207},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6001999974250793},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4171000123023987},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.4115999937057495},{"id":"https://openalex.org/keywords/photography","display_name":"Photography","score":0.4074999988079071},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.38989999890327454},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.367000013589859}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8144999742507935},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.6942999958992004},{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.6668000221252441},{"id":"https://openalex.org/C2777080924","wikidata":"https://www.wikidata.org/wiki/Q334667","display_name":"Storyboard","level":2,"score":0.6498000025749207},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6001999974250793},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4668999910354614},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4171000123023987},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.4115999937057495},{"id":"https://openalex.org/C119657128","wikidata":"https://www.wikidata.org/wiki/Q11633","display_name":"Photography","level":2,"score":0.4074999988079071},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.40389999747276306},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.38989999890327454},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C2779754051","wikidata":"https://www.wikidata.org/wiki/Q2903135","display_name":"Interactive storytelling","level":4,"score":0.36489999294281006},{"id":"https://openalex.org/C45874996","wikidata":"https://www.wikidata.org/wiki/Q37045","display_name":"Markup language","level":3,"score":0.35089999437332153},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3303999900817871},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3246999979019165},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3147999942302704},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C2780126544","wikidata":"https://www.wikidata.org/wiki/Q837900","display_name":"Hypermedia","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2800000011920929},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.2549000084400177},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3764009","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3764009","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2672035888","display_name":null,"funder_award_id":"62422603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2982215948","https://openalex.org/W3153469116","https://openalex.org/W3169275056","https://openalex.org/W4200476790","https://openalex.org/W4393148714","https://openalex.org/W4402582789","https://openalex.org/W4402727337","https://openalex.org/W4402727496","https://openalex.org/W4402727736","https://openalex.org/W4402774246","https://openalex.org/W4404965628","https://openalex.org/W4413146174","https://openalex.org/W4413982205","https://openalex.org/W4415795847"],"related_works":[],"abstract_inverted_index":{"Despite":[0],"rapid":[1],"advancements":[2],"in":[3,31,143,170,199],"video":[4,42,112],"generation":[5,43,78],"models,":[6],"generating":[7],"coherent,":[8],"long-form":[9],"storytelling":[10,237],"videos":[11],"that":[12,46,154,210],"span":[13],"multiple":[14],"scenes":[15],"and":[16,34,61,79,87,120,126,168,192,205,222,244],"characters":[17],"remains":[18],"challenging.":[19],"Current":[20],"methods":[21],"often":[22],"rigidly":[23],"convert":[24],"pre-generated":[25],"keyframes":[26],"into":[27],"fixed-length":[28],"clips,":[29],"resulting":[30],"disjointed":[32],"narratives":[33],"pacing":[35],"issues.":[36],"Furthermore,":[37],"the":[38,55,102,108,115,121,157,173,200,230],"inherent":[39],"instability":[40],"of":[41,202,232],"models":[44],"means":[45],"even":[47],"a":[48,71],"single":[49],"low-quality":[50],"clip":[51,77,81,113,198],"can":[52],"significantly":[53,228],"degrade":[54],"entire":[56],"output":[57],"animation\u2019s":[58],"logical":[59],"coherence":[60],"visual":[62],"continuity.":[63],"To":[64],"overcome":[65],"these":[66],"obstacles,":[67],"we":[68],"introduce":[69],"AniMaker,":[70],"multi-agent":[72],"framework":[73,95,175],"enabling":[74],"efficient":[75,147],"multi-candidate":[76,233],"storytelling-aware":[80],"selection,":[82],"thus":[83],"creating":[84],"globally":[85],"consistent":[86],"story-coherent":[88],"animation":[89,180,238],"solely":[90],"from":[91],"text":[92],"input.":[93],"The":[94],"is":[96],"structured":[97],"around":[98],"specialized":[99],"agents,":[100],"including":[101,220],"Director":[103],"Agent":[104,110,117,123],"for":[105,111,118,124,178,246],"storyboard":[106],"generation,":[107,114,234],"Photography":[109,144],"Reviewer":[116,171],"evaluation,":[119,181],"Post-Production":[122],"editing":[125],"voiceover,":[127],"collectively":[128],"realizing":[129],"multi-character,":[130],"multi-scene":[131],"animation.":[132],"Central":[133],"to":[134,160,240],"AniMaker\u2019s":[135],"approach":[136],"are":[137,249],"two":[138],"key":[139],"technical":[140],"components:":[141],"MCTS-Gen":[142],"Agent,":[145,172],"an":[146],"Monte":[148],"Carlo":[149],"Tree":[150],"Search":[151],"(MCTS)-inspired":[152],"strategy":[153],"intelligently":[155],"navigates":[156],"candidate":[158],"space":[159],"generate":[161],"high-potential":[162],"clips":[163],"while":[164,227],"optimizing":[165],"resource":[166],"usage;":[167],"AniEval":[169,225],"first":[174],"specifically":[176],"designed":[177],"multi-shot":[179],"which":[182],"assesses":[183],"critical":[184],"aspects":[185],"such":[186],"as":[187,215],"story-level":[188],"consistency,":[189],"action":[190],"completion,":[191],"animation-specific":[193],"features":[194],"by":[195,217],"considering":[196],"each":[197],"context":[201],"its":[203],"preceding":[204],"succeeding":[206],"clips.":[207],"Experiments":[208],"demonstrate":[209],"AniMaker":[211],"achieves":[212],"superior":[213],"quality":[214],"measured":[216],"popular":[218],"metrics":[219],"VBench":[221],"our":[223],"proposed":[224],"framework,":[226],"improving":[229],"efficiency":[231],"pushing":[235],"AI-generated":[236],"closer":[239],"production":[241],"standards.":[242],"Code":[243],"data":[245],"this":[247],"paper":[248],"at":[250],"https://animaker-dev.github.io/":[251]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-08T00:00:00"}
