{"id":"https://openalex.org/W4414359896","doi":"https://doi.org/10.24963/ijcai.2025/474","title":"VideoHumanMIB: Unlocking Appearance Decoupling for Video Human Motion In-betweening","display_name":"VideoHumanMIB: Unlocking Appearance Decoupling for Video Human Motion In-betweening","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414359896","doi":"https://doi.org/10.24963/ijcai.2025/474"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/474","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102788950","display_name":"Haiwei Xue","orcid":"https://orcid.org/0000-0001-7318-9682"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haiwei Xue","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043901952","display_name":"Zhensong Zhang","orcid":"https://orcid.org/0009-0001-7911-7564"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Zhensong Zhang","raw_affiliation_strings":["Huawei Noah's Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100780133","display_name":"Minglei Li","orcid":"https://orcid.org/0000-0001-5432-2855"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Minglei Li","raw_affiliation_strings":["Beijing Ruxiaoyi Intelligent Technology Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Ruxiaoyi Intelligent Technology Co., Ltd","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103105111","display_name":"Zonghong Dai","orcid":"https://orcid.org/0009-0006-7723-4130"},"institutions":[{"id":"https://openalex.org/I4210146277","display_name":"Infotech Soft (United States)","ror":"https://ror.org/04bw2xb41","country_code":"US","type":"company","lineage":["https://openalex.org/I4210146277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zonghong Dai","raw_affiliation_strings":["Beijing JidianQiyuan InfoTech Co. Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing JidianQiyuan InfoTech Co. Ltd","institution_ids":["https://openalex.org/I4210146277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072709871","display_name":"Fei Yu","orcid":"https://orcid.org/0000-0002-1831-859X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei Yu","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031311274","display_name":"Fei Ma","orcid":"https://orcid.org/0009-0002-5388-9125"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei Ma","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ)","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102788950"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.21915488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4254","last_page":"4262"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9779999852180481,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9779999852180481,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9726999998092651,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.9416000247001648,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.6581000089645386},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.578000009059906},{"id":"https://openalex.org/keywords/motion-interpolation","display_name":"Motion interpolation","score":0.5374000072479248},{"id":"https://openalex.org/keywords/motion-compensation","display_name":"Motion compensation","score":0.5227000117301941},{"id":"https://openalex.org/keywords/motion-estimation","display_name":"Motion estimation","score":0.5074999928474426},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.46000000834465027},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.4275999963283539},{"id":"https://openalex.org/keywords/match-moving","display_name":"Match moving","score":0.40610000491142273},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.39660000801086426}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7802000045776367},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7687000036239624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7584999799728394},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.6581000089645386},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.578000009059906},{"id":"https://openalex.org/C72560505","wikidata":"https://www.wikidata.org/wiki/Q204510","display_name":"Motion interpolation","level":5,"score":0.5374000072479248},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.5227000117301941},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.5074999928474426},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.46000000834465027},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.4275999963283539},{"id":"https://openalex.org/C95020103","wikidata":"https://www.wikidata.org/wiki/Q1813492","display_name":"Match moving","level":3,"score":0.40610000491142273},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.39660000801086426},{"id":"https://openalex.org/C2986578859","wikidata":"https://www.wikidata.org/wiki/Q657632","display_name":"Human motion","level":3,"score":0.3864000141620636},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.36579999327659607},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.35600000619888306},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C124774092","wikidata":"https://www.wikidata.org/wiki/Q6917782","display_name":"Motion field","level":3,"score":0.3084000051021576},{"id":"https://openalex.org/C174493125","wikidata":"https://www.wikidata.org/wiki/Q1073461","display_name":"Quarter-pixel motion","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C146159030","wikidata":"https://www.wikidata.org/wiki/Q7625099","display_name":"Structure from motion","level":3,"score":0.2750999927520752},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2745000123977661},{"id":"https://openalex.org/C117090137","wikidata":"https://www.wikidata.org/wiki/Q7927977","display_name":"Video post-processing","level":5,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/474","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"propose":[1],"VideoHumanMIB,":[2],"a":[3,64,126,144,149,189],"novel":[4],"framework":[5,140],"for":[6,39,192],"Video":[7],"Human":[8],"Motion":[9],"In-betweening":[10],"that":[11,92,164,172],"enables":[12,197],"seamless":[13],"transitions":[14],"between":[15],"different":[16],"motion":[17,78,95,137,150,183,194],"video":[18,33],"clips,":[19],"facilitating":[20],"the":[21,105,135],"generation":[22],"of":[23,112],"longer":[24],"and":[25,55,77,98,148,157,196,200],"more":[26,198],"natural":[27,158,199],"digital":[28,202],"human":[29,50,99,193,203],"videos.":[30],"While":[31],"existing":[32,175],"frame":[34],"interpolation":[35],"methods":[36],"work":[37],"well":[38],"similar":[40],"motions":[41],"in":[42,53,119,125,178],"adjacent":[43],"frames,":[44],"they":[45],"often":[46],"struggle":[47],"with":[48,143,181],"complex":[49],"movements,":[51],"resulting":[52],"artifacts":[54],"unrealistic":[56],"transitions.":[57,160],"To":[58],"address":[59],"these":[60],"challenges,":[61],"we":[62,68,85],"introduce":[63],"two-stage":[65],"approach:":[66],"First,":[67],"design":[69],"an":[70,87],"Appearance":[71],"Reconstruction":[72],"AutoEncoder":[73],"to":[74,107,132,153],"decouple":[75],"appearance":[76],"information,":[79],"extracting":[80],"robust":[81],"appearance-invariant":[82],"features.":[83],"Second,":[84],"develop":[86],"enhanced":[88],"diffusion":[89],"pretrained":[90],"network":[91],"leverages":[93],"both":[94],"optical":[96],"flow":[97,151],"pose":[100],"as":[101],"guidance":[102],"conditions,":[103],"enabling":[104],"model":[106,123],"learn":[108],"comprehensive":[109],"latent":[110,128],"distributions":[111],"possible":[113],"motions.":[114],"Rather":[115],"than":[116],"operating":[117],"directly":[118],"pixel":[120],"space,":[121,129],"our":[122,165],"works":[124],"learned":[127],"allowing":[130],"it":[131],"better":[133],"capture":[134],"underlying":[136],"dynamics.":[138],"The":[139,185],"is":[141],"optimized":[142],"dual-frame":[145],"constraint":[146],"loss":[147,152],"ensure":[154],"temporal":[155],"consistency":[156],"movement":[159],"Extensive":[161],"experiments":[162],"demonstrate":[163],"approach":[166],"generates":[167],"highly":[168],"realistic":[169],"transition":[170],"sequences":[171],"significantly":[173],"outperform":[174],"methods,":[176],"particularly":[177],"challenging":[179],"scenarios":[180],"large":[182],"variations.":[184],"proposed":[186],"VideoHumanMIB":[187],"establishes":[188],"new":[190],"baseline":[191],"synthesis":[195],"controllable":[201],"animation.":[204]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
