{"id":"https://openalex.org/W4412613010","doi":"https://doi.org/10.1049/ipr2.70155","title":"Speech2Face3D: A Two\u2010Stage Transfer\u2010Learning Framework for Speech\u2010Driven 3D Facial Animation","display_name":"Speech2Face3D: A Two\u2010Stage Transfer\u2010Learning Framework for Speech\u2010Driven 3D Facial Animation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412613010","doi":"https://doi.org/10.1049/ipr2.70155"},"language":"en","primary_location":{"id":"doi:10.1049/ipr2.70155","is_oa":true,"landing_page_url":"https://doi.org/10.1049/ipr2.70155","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/ipr2.70155","source":{"id":"https://openalex.org/S83215360","display_name":"IET Image Processing","issn_l":"1751-9659","issn":["1751-9659","1751-9667"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311714","host_organization_name":"Institution of Engineering and Technology","host_organization_lineage":["https://openalex.org/P4310311714"],"host_organization_lineage_names":["Institution of Engineering and Technology"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IET Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/ipr2.70155","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076564759","display_name":"Li-Ming Pang","orcid":"https://orcid.org/0009-0001-4855-9132"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liming Pang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications Beijing China"],"raw_orcid":"https://orcid.org/0009-0001-4855-9132","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications Beijing China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100410706","display_name":"Zhi Zeng","orcid":"https://orcid.org/0000-0003-2150-2088"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhi Zeng","raw_affiliation_strings":["Beijing University of Posts and Telecommunications Beijing China","Institute of Automation Chinese Academy of Sciences Beijing China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications Beijing China","institution_ids":["https://openalex.org/I139759216"]},{"raw_affiliation_string":"Institute of Automation Chinese Academy of Sciences Beijing China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108948179","display_name":"Yahui Li","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yahui Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications Beijing China"],"raw_orcid":"https://orcid.org/0009-0007-3063-0117","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications Beijing China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050659104","display_name":"Guixuan Zhang","orcid":"https://orcid.org/0000-0002-1072-8279"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guixuan Zhang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications Beijing China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications Beijing China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109291841","display_name":"Shuwu Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuwu Zhang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications Beijing China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications Beijing China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100410706"],"corresponding_institution_ids":["https://openalex.org/I139759216","https://openalex.org/I19820366","https://openalex.org/I4210112150"],"apc_list":{"value":2000,"currency":"EUR","value_usd":2200},"apc_paid":{"value":2000,"currency":"EUR","value_usd":2200},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1420817,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"19","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12301","display_name":"Facial Nerve Paralysis Treatment and Research","score":0.9695000052452087,"subfield":{"id":"https://openalex.org/subfields/2728","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7546583414077759},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.6909854412078857},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.6613590121269226},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5790805816650391},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4673630893230438},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.4621139168739319},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4547809660434723},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.24330344796180725}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7546583414077759},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.6909854412078857},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.6613590121269226},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5790805816650391},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4673630893230438},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.4621139168739319},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4547809660434723},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.24330344796180725}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1049/ipr2.70155","is_oa":true,"landing_page_url":"https://doi.org/10.1049/ipr2.70155","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/ipr2.70155","source":{"id":"https://openalex.org/S83215360","display_name":"IET Image Processing","issn_l":"1751-9659","issn":["1751-9659","1751-9667"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311714","host_organization_name":"Institution of Engineering and Technology","host_organization_lineage":["https://openalex.org/P4310311714"],"host_organization_lineage_names":["Institution of Engineering and Technology"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IET Image Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1049/ipr2.70155","is_oa":true,"landing_page_url":"https://doi.org/10.1049/ipr2.70155","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/ipr2.70155","source":{"id":"https://openalex.org/S83215360","display_name":"IET Image Processing","issn_l":"1751-9659","issn":["1751-9659","1751-9667"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311714","host_organization_name":"Institution of Engineering and Technology","host_organization_lineage":["https://openalex.org/P4310311714"],"host_organization_lineage_names":["Institution of Engineering and Technology"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IET Image Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6009529277","display_name":null,"funder_award_id":"2022YFF0902200","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4412613010.pdf"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W2097117768","https://openalex.org/W2154961933","https://openalex.org/W2737658251","https://openalex.org/W2739192055","https://openalex.org/W2769666294","https://openalex.org/W2804619907","https://openalex.org/W2928165649","https://openalex.org/W2954390955","https://openalex.org/W2962793908","https://openalex.org/W2977592368","https://openalex.org/W2981263323","https://openalex.org/W3041133507","https://openalex.org/W3094550259","https://openalex.org/W3099284785","https://openalex.org/W3154411171","https://openalex.org/W3180794345","https://openalex.org/W3196974791","https://openalex.org/W3209059054","https://openalex.org/W4200630629","https://openalex.org/W4226442948","https://openalex.org/W4312048190","https://openalex.org/W4320855021","https://openalex.org/W4322747088","https://openalex.org/W4328028374","https://openalex.org/W4372260443","https://openalex.org/W4380994134","https://openalex.org/W4386076250","https://openalex.org/W4386076313","https://openalex.org/W4387421624","https://openalex.org/W4387421789","https://openalex.org/W4388157164","https://openalex.org/W4389943445","https://openalex.org/W4390534338","https://openalex.org/W4390872742","https://openalex.org/W4390873123","https://openalex.org/W4391709904","https://openalex.org/W4396916344","https://openalex.org/W4400905566","https://openalex.org/W4401023891","https://openalex.org/W4402112200","https://openalex.org/W4402437172","https://openalex.org/W4402753521","https://openalex.org/W4403788878","https://openalex.org/W4405867666"],"related_works":["https://openalex.org/W1544039745","https://openalex.org/W2121378366","https://openalex.org/W2999276620","https://openalex.org/W2532377291","https://openalex.org/W1976926596","https://openalex.org/W2989004599","https://openalex.org/W2535923857","https://openalex.org/W3094080214","https://openalex.org/W2156310872","https://openalex.org/W2356609371"],"abstract_inverted_index":{"ABSTRACT":[0],"High\u2010fidelity,":[1],"speech\u2010driven":[2],"3D":[3,27,69,115,157],"facial":[4,56,158],"animation":[5],"is":[6,16,141],"crucial":[7],"for":[8,147],"immersive":[9],"applications":[10],"and":[11,29,62,107,129,144,155],"virtual":[12],"avatars.":[13],"Nevertheless,":[14],"advancement":[15],"impeded":[17],"by":[18],"two":[19],"principal":[20],"challenges:":[21],"(1)":[22],"a":[23,46,86,95,142],"lack":[24],"of":[25,33,37,76],"high\u2010quality":[26],"data,":[28],"(2)":[30],"inadequate":[31],"modelling":[32],"the":[34,74],"multi\u2010scale":[35],"characteristics":[36],"speech":[38],"signals.":[39],"In":[40],"this":[41],"paper,":[42],"we":[43],"present":[44],"Speech2Face3D,":[45],"novel":[47],"two\u2010stage":[48],"transfer\u2010learning":[49],"framework":[50,146],"that":[51,118],"pretrains":[52],"on":[53,65,113,124,162],"large\u2010scale":[54],"pseudo\u20103D":[55],"data":[57,151],"derived":[58],"from":[59],"2D":[60,79,150],"videos":[61],"subsequently":[63],"finetunes":[64],"smaller":[66],"yet":[67],"high\u2010fidelity":[68],"datasets.":[70],"This":[71],"design":[72],"leverages":[73],"richness":[75],"easily":[77],"accessible":[78],"resources":[80],"while":[81],"mitigating":[82],"reconstruction":[83],"noise":[84],"through":[85],"simple":[87],"temporal":[88,130],"smoothing":[89],"step.":[90],"Our":[91],"approach":[92],"further":[93],"introduces":[94],"Multi\u2010Scale":[96],"Hierarchical":[97],"Audio":[98],"Encoder":[99],"to":[100,152],"capture":[101],"subtle":[102],"phoneme":[103],"transitions,":[104],"mid\u2010range":[105],"prosody,":[106],"longer\u2010range":[108],"emotional":[109],"cues.":[110],"Extensive":[111],"experiments":[112],"public":[114],"benchmarks":[116],"demonstrate":[117],"our":[119],"method":[120],"achieves":[121],"state\u2010of\u2010the\u2010art":[122],"performance":[123],"lip":[125],"synchronization,":[126],"expression":[127],"fidelity,":[128],"coherence":[131],"metrics.":[132],"Qualitative":[133],"user":[134],"evaluations":[135],"validate":[136],"these":[137],"quantitative":[138],"improvements.":[139],"Speech2Face3D":[140],"robust":[143],"scalable":[145],"utilizing":[148],"extensive":[149],"generate":[153],"precise":[154],"realistic":[156],"animations":[159],"only":[160],"based":[161],"speech.":[163]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
