{"id":"https://openalex.org/W7138461072","doi":"https://doi.org/10.1609/aaai.v40i9.37638","title":"MoFu: Scale-Aware Modulation and Fourier Fusion for Multi-Subject Video Generation","display_name":"MoFu: Scale-Aware Modulation and Fourier Fusion for Multi-Subject Video Generation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138461072","doi":"https://doi.org/10.1609/aaai.v40i9.37638"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i9.37638","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i9.37638","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i9.37638","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121599805","display_name":"Run Ling","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Run Ling","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742446","display_name":"Ke Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ke Cao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129697157","display_name":"Jian Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Lu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129740293","display_name":"Ao Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ao Ma","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129721234","display_name":"Haowei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haowei Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129711477","display_name":"Runze He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Runze He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129659750","display_name":"Changwei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Changwei Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129695553","display_name":"Rongtao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rongtao Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742099","display_name":"Yihua Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yihua Shao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129730163","display_name":"Zhanjie Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhanjie Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129664808","display_name":"Peng Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129713709","display_name":"Guibing Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guibing Guo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660255","display_name":"Wei Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Feng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129733212","display_name":"Zheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742010","display_name":"Jingjing Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingjing Lv","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129686525","display_name":"Junjie Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junjie Shen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129751170","display_name":"Ching Law","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ching Law","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129724193","display_name":"Xingwei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xingwei Wang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5121599805"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.85309471,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"9","first_page":"7033","last_page":"7041"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.96670001745224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.96670001745224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.004600000102072954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0031999999191612005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/permutation","display_name":"Permutation (music)","score":0.5939000248908997},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5820000171661377},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5478000044822693},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.5153999924659729},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4650000035762787},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.4431999921798706},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.4097999930381775},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.38089999556541443}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.76419997215271},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.5939000248908997},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5820000171661377},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5478000044822693},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.5153999924659729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5139999985694885},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4650000035762787},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.4431999921798706},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.4097999930381775},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.396699994802475},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3828999996185303},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.33180001378059387},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2922999858856201},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C207864730","wikidata":"https://www.wikidata.org/wiki/Q179467","display_name":"Fourier series","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C11930861","wikidata":"https://www.wikidata.org/wiki/Q181417","display_name":"Frequency modulation","level":3,"score":0.2728999853134155},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C4069607","wikidata":"https://www.wikidata.org/wiki/Q868732","display_name":"Aliasing","level":3,"score":0.26109999418258667},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2605000138282776},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i9.37638","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i9.37638","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i9.37638","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i9.37638","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-subject":[0],"video":[1],"generation":[2],"aims":[3],"to":[4,38,88,119,131],"synthesize":[5],"videos":[6],"from":[7,82],"textual":[8],"prompts":[9],"and":[10,21,41,85,135,154,172],"multiple":[11],"reference":[12,48,112,155],"images,":[13],"ensuring":[14],"that":[15,62,77,106,160],"each":[16],"subject":[17,35,51,91,152,170],"preserves":[18],"natural":[19,168],"scale":[20,30,67,80,153],"visual":[22,174],"fidelity.":[23],"However,":[24],"current":[25],"methods":[26,165],"face":[27],"two":[28],"challenges:":[29],"inconsistency,":[31,68],"where":[32,44],"variations":[33,150],"in":[34,151,166],"size":[36],"lead":[37],"unnatural":[39],"generation,":[40],"permutation":[42,95],"sensitivity,":[43,96],"the":[45,83,108,115],"order":[46],"of":[47,111],"inputs":[49],"causes":[50],"distortion.":[52],"In":[53],"this":[54],"paper,":[55],"we":[56,69,97,125,143],"propose":[57],"MoFu,":[58],"a":[59,99,121,127,145],"unified":[60,122],"framework":[61],"tackles":[63],"both":[64],"challenges.":[65],"For":[66],"introduce":[70],"Scale-Aware":[71],"Modulation":[72],"(SMO),":[73],"an":[74],"LLM-guided":[75],"module":[76],"extracts":[78],"implicit":[79],"cues":[81],"prompt":[84],"modulates":[86],"features":[87,113],"ensure":[89],"consistent":[90],"sizes.":[92],"To":[93,138],"address":[94],"present":[98],"simple":[100],"yet":[101],"effective":[102],"Fourier":[103,117],"Fusion":[104],"strategy":[105],"processes":[107],"frequency":[109],"information":[110],"via":[114],"Fast":[116],"Transform":[118],"produce":[120],"representation.":[123],"Besides,":[124],"design":[126],"Scale-Permutation":[128],"Stability":[129],"Loss":[130],"jointly":[132],"encourage":[133],"scale-consistent":[134],"permutation-invariant":[136],"generation.":[137],"further":[139],"evaluate":[140],"these":[141],"challenges,":[142],"establish":[144],"dedicated":[146],"benchmark":[147],"with":[148],"controlled":[149],"permutation.":[156],"Extensive":[157],"experiments":[158],"demonstrate":[159],"MoFu":[161],"significantly":[162],"outperforms":[163],"existing":[164],"preserving":[167],"scale,":[169],"fidelity,":[171],"overall":[173],"quality.":[175]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
