{"id":"https://openalex.org/W4413156551","doi":"https://doi.org/10.1109/cvpr52734.2025.01973","title":"MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling","display_name":"MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413156551","doi":"https://doi.org/10.1109/cvpr52734.2025.01973"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.01973","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036805135","display_name":"Yifang Men","orcid":"https://orcid.org/0000-0003-2495-2869"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]},{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Yifang Men","raw_affiliation_strings":["Alibaba Group,Tongyi Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group,Tongyi Lab","institution_ids":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036736683","display_name":"Yuan Yao","orcid":"https://orcid.org/0009-0006-6884-8827"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]},{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yuan Yao","raw_affiliation_strings":["Alibaba Group,Tongyi Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group,Tongyi Lab","institution_ids":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101564882","display_name":"Miaomiao Cui","orcid":"https://orcid.org/0000-0001-5546-3967"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]},{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Miaomiao Cui","raw_affiliation_strings":["Alibaba Group,Tongyi Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group,Tongyi Lab","institution_ids":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085032007","display_name":"Liefeng Bo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]},{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Liefeng Bo","raw_affiliation_strings":["Alibaba Group,Tongyi Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group,Tongyi Lab","institution_ids":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036805135"],"corresponding_institution_ids":["https://openalex.org/I4210095624","https://openalex.org/I45928872"],"apc_list":null,"apc_paid":null,"fwci":8.3787,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.97858478,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"21181","last_page":"21191"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.968999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mimo","display_name":"MIMO","score":0.7528665065765381},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6884242296218872},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.6080400943756104},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.2292693555355072},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1144685447216034}],"concepts":[{"id":"https://openalex.org/C207987634","wikidata":"https://www.wikidata.org/wiki/Q176862","display_name":"MIMO","level":3,"score":0.7528665065765381},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6884242296218872},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.6080400943756104},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.2292693555355072},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1144685447216034},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.01973","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1967554269","https://openalex.org/W2064076387","https://openalex.org/W2133665775","https://openalex.org/W2962785568","https://openalex.org/W2971856312","https://openalex.org/W3035492592","https://openalex.org/W3106672182","https://openalex.org/W3176327543","https://openalex.org/W3202804820","https://openalex.org/W4200150166","https://openalex.org/W4200502498","https://openalex.org/W4312341890","https://openalex.org/W4312453532","https://openalex.org/W4312703683","https://openalex.org/W4312891300","https://openalex.org/W4312925709","https://openalex.org/W4312926441","https://openalex.org/W4312933868","https://openalex.org/W4313136947","https://openalex.org/W4385318467","https://openalex.org/W4386066366","https://openalex.org/W4386071957","https://openalex.org/W4390872458","https://openalex.org/W4390872519","https://openalex.org/W4390873054","https://openalex.org/W4390873135","https://openalex.org/W4390874168","https://openalex.org/W4390874306","https://openalex.org/W4390874335","https://openalex.org/W4390889818","https://openalex.org/W4393148505","https://openalex.org/W4399563927","https://openalex.org/W4399574593","https://openalex.org/W4402704510","https://openalex.org/W4402727180","https://openalex.org/W4402727211","https://openalex.org/W4402727359","https://openalex.org/W4402733569","https://openalex.org/W4402777641","https://openalex.org/W4402816951","https://openalex.org/W4404719810"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4251972423","https://openalex.org/W1503216044","https://openalex.org/W2393609567","https://openalex.org/W2369369044","https://openalex.org/W2354143083","https://openalex.org/W2372906645","https://openalex.org/W4319998713"],"abstract_inverted_index":{"Character":[0],"video":[1,130,141,159,270],"synthesis":[2,205,224,245],"aims":[3],"to":[4,104,108,114,126,131,186],"produce":[5],"realistic":[6,82],"videos":[7,84],"of":[8,39,140,203,209,254],"animatable":[9],"characters":[10,42],"within":[11],"lifelike":[12],"scenes.":[13],"As":[14],"a":[15,44,74,119,239,251],"fundamental":[16],"problem":[17],"in":[18,43,118,173,242,249],"the":[19,128,136,146,158,178,204,232],"computer":[20],"vision":[21],"and":[22,65,91,112,156,170,193,246],"graphics":[23],"community,":[24],"3D":[25,110,138,151,179,260],"works":[26,237],"typically":[27],"require":[28],"multi-view":[29],"captures":[30],"for":[31,60,225],"per-case":[32],"training,":[33],"which":[34,77,197],"severely":[35],"limits":[36],"their":[37],"applicability":[38,113],"modeling":[40,212],"arbitrary":[41,105,257],"short":[45],"time.":[46],"Recent":[47],"2D":[48,129,147],"methods":[49],"break":[50],"this":[51,69],"limitation":[52],"via":[53],"pre-trained":[54],"diffusion":[55],"models,":[56],"but":[57,98],"they":[58],"struggle":[59],"flexible":[61,214],"controls,":[62],"pose":[63],"generality":[64,107],"scene":[66,195,226],"interaction.":[67],"To":[68],"end,":[70],"we":[71,144],"propose":[72],"MIMO,":[73],"novel":[75,109,259],"framework":[76],"can":[78],"not":[79],"only":[80],"synthesize":[81],"character":[83,243,271],"with":[85],"controllable":[86],"attributes":[87],"(i.e.,":[88,165,256],"character,":[89],"motion":[90,191,218],"scene)":[92],"provided":[93],"by":[94,238],"simple":[95],"user":[96,215],"inputs,":[97],"also":[99],"simultaneously":[100],"achieve":[101],"advanced":[102],"scalability":[103],"characters,":[106,258],"motions,":[111,261],"interactive":[115,262],"real-world":[116],"scenes":[117],"unified":[120],"framework.":[121],"The":[122,207],"core":[123],"idea":[124],"is":[125,247],"encode":[127],"compact":[132],"spatial":[133,163,210],"codes,":[134],"considering":[135],"inherent":[137],"nature":[139],"occurrence.":[142],"Concretely,":[143],"lift":[145],"frame":[148],"pixels":[149],"into":[150,161],"using":[152],"monocular":[153],"depth":[154],"estimators,":[155],"decompose":[157],"clip":[160],"three":[162],"components":[164,182],"main":[166],"human,":[167],"underlying":[168],"scene,":[169],"floating":[171],"occlusion)":[172],"hierarchical":[174],"layers":[175],"based":[176],"on":[177],"depth.":[180],"These":[181],"are":[183,198],"further":[184],"encoded":[185],"canonical":[187],"identity":[188],"code,":[189,196],"structured":[190],"code":[192],"full":[194],"utilized":[199],"as":[200,220,222],"control":[201],"signals":[202],"process.":[206],"design":[208],"decomposed":[211],"enables":[213],"control,":[216],"complex":[217],"expression,":[219],"well":[221],"3D-aware":[223],"interactions.":[227],"Experimental":[228],"results":[229],"show":[230],"that":[231],"proposed":[233],"method":[234],"outperforms":[235],"prior":[236],"large":[240],"margin":[241],"animation":[244],"effective":[248],"providing":[250],"high":[252],"degree":[253],"controllability":[255],"scenes),":[263],"thus":[264],"enabling":[265],"brandnew":[266],"editing":[267],"tasks":[268],"(e.g.,":[269],"replacement).":[272]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
