{"id":"https://openalex.org/W3172598908","doi":"https://doi.org/10.1109/tmm.2021.3087020","title":"Speaker-Independent Speech Animation Using Perceptual Loss Functions and Synthetic Data","display_name":"Speaker-Independent Speech Animation Using Perceptual Loss Functions and Synthetic Data","publication_year":2021,"publication_date":"2021-06-14","ids":{"openalex":"https://openalex.org/W3172598908","doi":"https://doi.org/10.1109/tmm.2021.3087020","mag":"3172598908"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2021.3087020","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3087020","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067398774","display_name":"Danny Websdale","orcid":"https://orcid.org/0000-0002-6856-772X"},"institutions":[{"id":"https://openalex.org/I1118541","display_name":"University of East Anglia","ror":"https://ror.org/026k5mg93","country_code":"GB","type":"education","lineage":["https://openalex.org/I1118541"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Danny Websdale","raw_affiliation_strings":["School of Computing Sciences, University of East Anglia, Norwich, U.K"],"raw_orcid":"https://orcid.org/0000-0002-6856-772X","affiliations":[{"raw_affiliation_string":"School of Computing Sciences, University of East Anglia, Norwich, U.K","institution_ids":["https://openalex.org/I1118541"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101923852","display_name":"Sarah Taylor","orcid":"https://orcid.org/0000-0003-1299-0446"},"institutions":[{"id":"https://openalex.org/I1118541","display_name":"University of East Anglia","ror":"https://ror.org/026k5mg93","country_code":"GB","type":"education","lineage":["https://openalex.org/I1118541"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sarah Taylor","raw_affiliation_strings":["School of Computing Sciences, University of East Anglia, Norwich, U.K"],"raw_orcid":"https://orcid.org/0000-0003-1299-0446","affiliations":[{"raw_affiliation_string":"School of Computing Sciences, University of East Anglia, Norwich, U.K","institution_ids":["https://openalex.org/I1118541"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023076395","display_name":"Ben Milner","orcid":null},"institutions":[{"id":"https://openalex.org/I1118541","display_name":"University of East Anglia","ror":"https://ror.org/026k5mg93","country_code":"GB","type":"education","lineage":["https://openalex.org/I1118541"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ben Milner","raw_affiliation_strings":["School of Computing Sciences, University of East Anglia, Norwich, U.K"],"raw_orcid":"https://orcid.org/0000-0001-8208-4882","affiliations":[{"raw_affiliation_string":"School of Computing Sciences, University of East Anglia, Norwich, U.K","institution_ids":["https://openalex.org/I1118541"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2353,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.78978376,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"24","issue":null,"first_page":"2539","last_page":"2552"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.855013906955719},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6859843730926514},{"id":"https://openalex.org/keywords/computer-facial-animation","display_name":"Computer facial animation","score":0.6304765343666077},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.5788657665252686},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5077036619186401},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.4475633502006531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4403611421585083},{"id":"https://openalex.org/keywords/computer-animation","display_name":"Computer animation","score":0.3698430359363556},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.34282609820365906},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.21944481134414673},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.09627354145050049}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.855013906955719},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6859843730926514},{"id":"https://openalex.org/C138591656","wikidata":"https://www.wikidata.org/wiki/Q5157538","display_name":"Computer facial animation","level":4,"score":0.6304765343666077},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.5788657665252686},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5077036619186401},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.4475633502006531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4403611421585083},{"id":"https://openalex.org/C69369342","wikidata":"https://www.wikidata.org/wiki/Q1401416","display_name":"Computer animation","level":3,"score":0.3698430359363556},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.34282609820365906},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.21944481134414673},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.09627354145050049},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tmm.2021.3087020","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3087020","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:ueaeprints.uea.ac.uk:81010","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400384","display_name":"UEA Digital Repository (University of East Anglia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1118541","host_organization_name":"University of East Anglia","host_organization_lineage":["https://openalex.org/I1118541"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G455538984","display_name":null,"funder_award_id":"EP/M014053/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W594764791","https://openalex.org/W1554803342","https://openalex.org/W1569907127","https://openalex.org/W2010284423","https://openalex.org/W2014240681","https://openalex.org/W2029199293","https://openalex.org/W2032618685","https://openalex.org/W2032693983","https://openalex.org/W2064675550","https://openalex.org/W2082308025","https://openalex.org/W2114570173","https://openalex.org/W2120067677","https://openalex.org/W2120654454","https://openalex.org/W2129360799","https://openalex.org/W2131774270","https://openalex.org/W2142487393","https://openalex.org/W2147885303","https://openalex.org/W2150183867","https://openalex.org/W2152826865","https://openalex.org/W2401811665","https://openalex.org/W2402539796","https://openalex.org/W2403714865","https://openalex.org/W2468212864","https://openalex.org/W2511870926","https://openalex.org/W2515372520","https://openalex.org/W2721682741","https://openalex.org/W2737658251","https://openalex.org/W2739192055","https://openalex.org/W2745980686","https://openalex.org/W2752161456","https://openalex.org/W2804619907","https://openalex.org/W2888823109","https://openalex.org/W2895226286","https://openalex.org/W2915977493","https://openalex.org/W2944294033","https://openalex.org/W2954390955","https://openalex.org/W2962824709","https://openalex.org/W2963822910","https://openalex.org/W2968193852","https://openalex.org/W2979894294","https://openalex.org/W2980581183","https://openalex.org/W3120163087","https://openalex.org/W4252434905","https://openalex.org/W6617666375","https://openalex.org/W6639968709","https://openalex.org/W6650605322","https://openalex.org/W6733012293","https://openalex.org/W6765657114"],"related_works":["https://openalex.org/W1544039745","https://openalex.org/W2121378366","https://openalex.org/W2999276620","https://openalex.org/W2535923857","https://openalex.org/W2989004599","https://openalex.org/W1976926596","https://openalex.org/W3094080214","https://openalex.org/W2156310872","https://openalex.org/W2356609371","https://openalex.org/W2532377291"],"abstract_inverted_index":{"We":[0,84],"propose":[1],"a":[2,15,48,86,101,109,126,196],"real-time":[3,71],"speaker-independent":[4],"speech-to-facial":[5],"animation":[6,36,66,165,214],"system":[7],"that":[8,91,172,199,209,223],"predicts":[9],"lip":[10],"and":[11,178,183,195,222],"jaw":[12],"movements":[13],"on":[14],"reference":[16],"face":[17],"for":[18,228],"audio":[19,129],"speech":[20,49,65,77,106,146],"taken":[21],"from":[22,40,108,219],"any":[23],"speaker.":[24],"Our":[25],"approach":[26,211],"is":[27,51,135],"motivated":[28],"by":[29,166],"two":[30,168],"key":[31],"observations;":[32],"1)":[33],"Speakerindependent":[34],"facial":[35,164,213],"can":[37,67],"be":[38,68],"generated":[39],"phoneme":[41,220],"labels,":[42],"but":[43,72],"to":[44,55,119,174,216],"perform":[45],"this":[46,117],"automatically":[47],"recogniser":[50],"needed":[52],"which,":[53],"due":[54],"contextual":[56],"look-ahead,":[57],"introduces":[58],"too":[59],"much":[60],"time":[61],"lag.":[62],"2)":[63],"Audio-driven":[64],"performed":[69],"in":[70],"requires":[73],"large,":[74],"multi-speaker":[75,128],"audio-visual":[76,112],"datasets":[78],"of":[79,95,125,131,161,203],"which":[80,132],"there":[81],"are":[82,186,231],"few.":[83],"adopt":[85],"novel":[87],"threestage":[88],"training":[89],"procedure":[90],"leverages":[92],"the":[93,121,133,149,154,159,162,201],"advantages":[94],"each":[96],"approach.":[97],"First":[98],"we":[99,115,139,157],"train":[100],"<italic":[102,142],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[103,143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">phoneme</i>":[104],"-to-visual":[105,145],"model":[107,118],"large":[110,127],"single-speaker":[111],"dataset.":[113],"Next,":[114],"use":[116],"generate":[120],"synthetic":[122,150],"visual":[123,151],"component":[124],"dataset":[130],"video":[134],"not":[136],"available.":[137],"Finally,":[138],"learn":[140],"an":[141],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">audio</i>":[144],"mapping":[147],"using":[148,189],"features":[152],"as":[153],"target.":[155],"Furthermore,":[156],"increase":[158],"realism":[160],"predicted":[163],"introducing":[167],"perceptually-based":[169],"loss":[170,184],"functions":[171,185],"aim":[173],"improve":[175],"mouth":[176,204,225],"closures":[177],"openings.":[179],"The":[180],"proposed":[181],"method":[182],"evaluated":[187],"objectively":[188],"mean":[190],"square":[191],"error,":[192],"global":[193],"variance":[194],"new":[197],"metric":[198],"measures":[200],"extent":[202],"opening.":[205],"Subjective":[206],"tests":[207],"show":[208],"our":[210],"produces":[212],"comparable":[215],"those":[217],"produced":[218],"sequences":[221],"improved":[224],"closures,":[226,230],"particularly":[227],"bilabial":[229],"achieved.":[232]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
