{"id":"https://openalex.org/W7110053609","doi":"https://doi.org/10.1145/3757377.3763914","title":"High-Fidelity Dynamic Portrait Animation via Direct Preference Optimization and Temporal Motion Modulation","display_name":"High-Fidelity Dynamic Portrait Animation via Direct Preference Optimization and Temporal Motion Modulation","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7110053609","doi":"https://doi.org/10.1145/3757377.3763914"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763914","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3763914","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jiahao Cui","orcid":"https://orcid.org/0009-0004-4263-4842"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahao Cui","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Baoyou Chen","orcid":"https://orcid.org/0009-0003-5291-0364"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baoyou Chen","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mingwang Xu","orcid":"https://orcid.org/0009-0004-5842-6941"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingwang Xu","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hanlin Shang","orcid":"https://orcid.org/0009-0008-9382-6085"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanlin Shang","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuxuan Chen","orcid":"https://orcid.org/0009-0003-5680-6345"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxuan Chen","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qinkun Su","orcid":"https://orcid.org/0000-0001-6977-9989"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinkun Su","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zilong Dong","orcid":"https://orcid.org/0000-0002-6833-9102"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zilong Dong","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yao Yao","orcid":"https://orcid.org/0000-0001-9866-4291"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Yao","raw_affiliation_strings":["Nanjing University, SuZhou, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, SuZhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jingdong Wang","orcid":"https://orcid.org/0000-0002-4888-4445"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingdong Wang","raw_affiliation_strings":["Baidu, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Baidu, Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"last","author":{"id":null,"display_name":"Siyu Zhu","orcid":"https://orcid.org/0009-0007-6852-0888"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyu Zhu","raw_affiliation_strings":["Fudan University, Shanghai, China and Shanghai Innovative Institute, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China and Shanghai Innovative Institute, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":1.2604,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85656491,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.5044000148773193,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.5044000148773193,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.21310000121593475,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.09390000253915787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.7110000252723694},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.6315000057220459},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.5295000076293945},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4788999855518341},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4537999927997589},{"id":"https://openalex.org/keywords/motion-blur","display_name":"Motion blur","score":0.4399999976158142},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.36070001125335693},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.35260000824928284},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.3301999866962433}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7570000290870667},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.7110000252723694},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.640999972820282},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6385999917984009},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.6315000057220459},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.5295000076293945},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4788999855518341},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4537999927997589},{"id":"https://openalex.org/C2777708103","wikidata":"https://www.wikidata.org/wiki/Q852589","display_name":"Motion blur","level":3,"score":0.4399999976158142},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.36070001125335693},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C2777036941","wikidata":"https://www.wikidata.org/wiki/Q6917771","display_name":"Motion analysis","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.2906000018119812},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.2786000072956085},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2655999958515167},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763914","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3763914","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W3081492798","https://openalex.org/W4312349930","https://openalex.org/W4386072021","https://openalex.org/W4386075487","https://openalex.org/W4390873054","https://openalex.org/W4390874575","https://openalex.org/W4402753790","https://openalex.org/W4413145441","https://openalex.org/W4413147005","https://openalex.org/W4415796023","https://openalex.org/W4415799309"],"related_works":[],"abstract_inverted_index":{"Generating":[0],"highly":[1],"dynamic":[2],"and":[3,10,26,73,100,122,127,151],"photorealistic":[4],"portrait":[5,70,124],"animations":[6],"driven":[7],"by":[8,87],"audio":[9],"skeletal":[11],"motion":[12,29,81,89,109,138],"remains":[13],"challenging":[14],"due":[15],"to":[16,62,119],"the":[17,78,105],"need":[18],"for":[19,52,69,153],"precise":[20],"lip":[21],"synchronization,":[22,134],"natural":[23],"facial":[24],"expressions,":[25],"high-fidelity":[27],"body":[28,137],"dynamics.":[30],"We":[31],"propose":[32],"a":[33,56],"human-preference-aligned":[34],"diffusion":[35,125],"framework":[36],"that":[37],"addresses":[38],"these":[39],"challenges":[40],"through":[41,96],"two":[42],"key":[43],"innovations.":[44],"First,":[45],"we":[46],"introduce":[47],"direct":[48],"preference":[49,148],"optimization":[50],"tailored":[51],"human-centric":[53],"animation,":[54],"leveraging":[55],"curated":[57],"dataset":[58],"of":[59,75,107],"human":[60,147],"preferences":[61],"align":[63],"generated":[64],"outputs":[65],"with":[66],"perceptual":[67],"metrics":[68],"motion-video":[71],"alignment":[72],"naturalness":[74],"expression.":[76],"Second,":[77],"proposed":[79,115],"temporal":[80,97],"modulation":[82],"resolves":[83],"spatiotemporal":[84],"resolution":[85],"mismatches":[86],"reshaping":[88],"conditions":[90],"into":[91],"dimensionally":[92],"aligned":[93],"latent":[94],"features":[95],"channel":[98],"redistribution":[99],"proportional":[101],"feature":[102],"expansion,":[103],"preserving":[104],"fidelity":[106],"high-frequency":[108],"details":[110],"in":[111,132,146],"diffusion-based":[112],"synthesis.":[113],"The":[114],"mechanism":[116],"is":[117],"complementary":[118],"existing":[120],"UNet":[121],"DiT-based":[123],"approaches,":[126],"experiments":[128],"demonstrate":[129],"obvious":[130],"improvements":[131],"lip-audio":[133],"expression":[135],"vividness,":[136],"coherence":[139],"over":[140],"baseline":[141],"methods,":[142],"alongside":[143],"notable":[144],"gains":[145],"metrics.":[149],"Code":[150],"data":[152],"this":[154],"paper":[155],"are":[156],"at":[157],"https://github.com/fudan-generative-vision/hallo4.":[158]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-10T02:45:41.426853","created_date":"2025-12-08T00:00:00"}
