{"id":"https://openalex.org/W4417124870","doi":"https://doi.org/10.1145/3757377.3763985","title":"X-Actor: Emotional and Expressive Long-Range Portrait Acting from Audio","display_name":"X-Actor: Emotional and Expressive Long-Range Portrait Acting from Audio","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W4417124870","doi":"https://doi.org/10.1145/3757377.3763985"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763985","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3763985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078876971","display_name":"Chenxu Zhang","orcid":"https://orcid.org/0000-0002-1971-1975"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chenxu Zhang","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063345044","display_name":"Zenan Li","orcid":"https://orcid.org/0009-0001-7794-5358"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zenan Li","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100691898","display_name":"Hongyi Xu","orcid":"https://orcid.org/0009-0006-4455-5632"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongyi Xu","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102446690","display_name":"You Xie","orcid":"https://orcid.org/0000-0001-7320-6518"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"You Xie","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070505636","display_name":"Xiaochen Zhao","orcid":"https://orcid.org/0000-0001-8976-7723"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaochen Zhao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020924907","display_name":"Tianpei Gu","orcid":"https://orcid.org/0000-0003-3173-8895"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianpei Gu","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012261844","display_name":"Guoxian Song","orcid":"https://orcid.org/0000-0002-3664-572X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guoxian Song","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100363100","display_name":"Xin Chen","orcid":"https://orcid.org/0000-0002-9347-1367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Chen","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057566911","display_name":"Chao Liang","orcid":"https://orcid.org/0000-0002-6923-4505"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chao Liang","raw_affiliation_strings":["ByteDance Inc., Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103386698","display_name":"Jianwen Jiang","orcid":"https://orcid.org/0000-0002-9495-5590"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianwen Jiang","raw_affiliation_strings":["ByteDance Inc., Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., Hangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081407963","display_name":"Linjie Luo","orcid":"https://orcid.org/0000-0001-6322-1175"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Linjie Luo","raw_affiliation_strings":["ByteDance Inc., San Jose, USA"],"affiliations":[{"raw_affiliation_string":"ByteDance Inc., San Jose, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5078876971"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2784,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86296561,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.7835000157356262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.7835000157356262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.11060000211000443,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.030500000342726707,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/portrait","display_name":"Portrait","score":0.5735999941825867},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.5526000261306763},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.5389000177383423},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.512499988079071},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.44830000400543213},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4320000112056732},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4253999888896942},{"id":"https://openalex.org/keywords/rhythm","display_name":"Rhythm","score":0.41510000824928284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6392999887466431},{"id":"https://openalex.org/C162462552","wikidata":"https://www.wikidata.org/wiki/Q134307","display_name":"Portrait","level":2,"score":0.5735999941825867},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.5526000261306763},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.5389000177383423},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.512499988079071},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.44830000400543213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4399999976158142},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4320000112056732},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42579999566078186},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4253999888896942},{"id":"https://openalex.org/C135343436","wikidata":"https://www.wikidata.org/wiki/Q170406","display_name":"Rhythm","level":2,"score":0.41510000824928284},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4124999940395355},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.3928999900817871},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.391400009393692},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.38989999890327454},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.36820000410079956},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.3264000117778778},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.32199999690055847},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C2780226923","wikidata":"https://www.wikidata.org/wiki/Q929848","display_name":"Movement (music)","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.28780001401901245},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2639000117778778},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763985","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3763985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2604379605","https://openalex.org/W2738406145","https://openalex.org/W2963081548","https://openalex.org/W3019952993","https://openalex.org/W3081492798","https://openalex.org/W3180770160","https://openalex.org/W3197199219","https://openalex.org/W4304014863","https://openalex.org/W4310379947","https://openalex.org/W4312301053","https://openalex.org/W4312722235","https://openalex.org/W4312933868","https://openalex.org/W4386072021","https://openalex.org/W4404199654","https://openalex.org/W4409369524","https://openalex.org/W4413147005","https://openalex.org/W4413147016","https://openalex.org/W4413147572","https://openalex.org/W4413822497","https://openalex.org/W4415798523","https://openalex.org/W4415799309","https://openalex.org/W6890119802"],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"X-Actor,":[2],"a":[3,18,67,87,94,110,136],"novel":[4],"audio-driven":[5,170],"portrait":[6,46,172],"animation":[7],"framework":[8],"that":[9,30,52,77,99,151,157],"generates":[10],"lifelike,":[11],"emotionally":[12],"expressive":[13,79],"talking":[14,161],"head":[15,162],"videos":[16],"from":[17,117],"single":[19],"reference":[20],"image":[21],"and":[22,34,58,119,132,164],"an":[23,72],"input":[24],"audio":[25,131],"clip.":[26],"Unlike":[27],"prior":[28],"methods":[29],"emphasize":[31],"lip":[32],"synchronization":[33],"short-range":[35],"visual":[36,118],"fidelity":[37],"in":[38,109,168],"constrained":[39],"speaking":[40],"scenarios,":[41],"X-Actor":[42,152],"enables":[43],"actor-quality,":[44],"long-form":[45],"performance\u2014capturing":[47],"nuanced,":[48],"dynamically":[49],"evolving":[50],"emotions":[51],"flow":[53],"coherently":[54],"with":[55],"the":[56],"rhythm":[57],"content":[59],"of":[60],"speech.":[61],"Central":[62],"to":[63,176],"our":[64,122],"approach":[65],"is":[66],"two-stage":[68],"decoupled":[69,116],"generation":[70],"pipeline:":[71],"audio-conditioned":[73],"autoregressive":[74,123],"diffusion":[75,124],"model":[76,125],"predicts":[78],"yet":[80],"identity-agnostic":[81],"facial":[82,112,133],"motion":[83,113,143],"latent":[84,114],"tokens":[85],"within":[86],"long":[88],"temporal":[89],"context":[90],"window,":[91],"followed":[92],"by":[93],"diffusion-based":[95],"video":[96,105],"synthesis":[97],"module":[98],"translates":[100],"these":[101],"motions":[102],"into":[103],"high-fidelity":[104],"animations.":[106],"By":[107],"operating":[108],"compact":[111],"space":[115],"identity":[120],"cues,":[121],"effectively":[126],"captures":[127],"long-range":[128],"correlations":[129],"between":[130],"dynamics":[134],"through":[135],"diffusion-forcing":[137],"training":[138],"paradigm,":[139],"enabling":[140],"infinite-length":[141],"emotionally-rich":[142],"prediction":[144],"without":[145],"error":[146],"accumulation.":[147],"Extensive":[148],"experiments":[149],"demonstrate":[150],"produces":[153],"compelling,":[154],"cinematic-style":[155],"performances":[156],"go":[158],"beyond":[159],"standard":[160],"animations":[163],"achieves":[165],"state-of-the-art":[166],"results":[167],"long-range,":[169],"emotional":[171],"acting.":[173],"Please":[174],"refer":[175],"https://byteaigc.github.io/X-Actor/":[177],"for":[178],"more":[179],"results.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-08T00:00:00"}
