{"id":"https://openalex.org/W3186700381","doi":"https://doi.org/10.1109/tmm.2021.3099900","title":"Speech Driven Talking Face Generation From a Single Image and an Emotion Condition","display_name":"Speech Driven Talking Face Generation From a Single Image and an Emotion Condition","publication_year":2021,"publication_date":"2021-07-26","ids":{"openalex":"https://openalex.org/W3186700381","doi":"https://doi.org/10.1109/tmm.2021.3099900","mag":"3186700381"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2021.3099900","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3099900","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026088950","display_name":"\u015eefik Emre Eskimez","orcid":"https://orcid.org/0000-0001-6259-5925"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sefik Emre Eskimez","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Cognitive Services Research (CSR) team, Microsoft, Bellevue, WA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Cognitive Services Research (CSR) team, Microsoft, Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100384568","display_name":"You Zhang","orcid":"https://orcid.org/0000-0002-4649-278X"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"You Zhang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102749436","display_name":"Zhiyao Duan","orcid":"https://orcid.org/0000-0002-8334-9974"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiyao Duan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5026088950"],"corresponding_institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"],"apc_list":null,"apc_paid":null,"fwci":5.6198,"has_fulltext":false,"cited_by_count":93,"citation_normalized_percentile":{"value":0.97047193,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"24","issue":null,"first_page":"3480","last_page":"3490"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7964816093444824},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.6051390767097473},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5995410084724426},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5969332456588745},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5355852246284485},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5318678617477417},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.4928867816925049},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.45870575308799744},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.42208027839660645},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3375932574272156},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.33547818660736084}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7964816093444824},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.6051390767097473},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5995410084724426},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5969332456588745},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5355852246284485},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5318678617477417},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.4928867816925049},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.45870575308799744},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.42208027839660645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3375932574272156},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.33547818660736084},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2021.3099900","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3099900","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.41999998688697815,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2742395312","display_name":null,"funder_award_id":"1741472","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"}],"funders":[{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W129316862","https://openalex.org/W1133916940","https://openalex.org/W1493549571","https://openalex.org/W1506209491","https://openalex.org/W1514274094","https://openalex.org/W1686810756","https://openalex.org/W1777859530","https://openalex.org/W1789721553","https://openalex.org/W1901129140","https://openalex.org/W1971474447","https://openalex.org/W1976985738","https://openalex.org/W1999728850","https://openalex.org/W2007495837","https://openalex.org/W2023055982","https://openalex.org/W2030931454","https://openalex.org/W2047908040","https://openalex.org/W2093428321","https://openalex.org/W2110501904","https://openalex.org/W2114336453","https://openalex.org/W2126599445","https://openalex.org/W2133665775","https://openalex.org/W2135292316","https://openalex.org/W2136153767","https://openalex.org/W2143350951","https://openalex.org/W2147222770","https://openalex.org/W2149628368","https://openalex.org/W2150183867","https://openalex.org/W2155494341","https://openalex.org/W2162598851","https://openalex.org/W2164186291","https://openalex.org/W2171198878","https://openalex.org/W2234166055","https://openalex.org/W2583542555","https://openalex.org/W2738406145","https://openalex.org/W2739192055","https://openalex.org/W2753840835","https://openalex.org/W2794810572","https://openalex.org/W2795230330","https://openalex.org/W2883409523","https://openalex.org/W2889985731","https://openalex.org/W2912159753","https://openalex.org/W2913428326","https://openalex.org/W2914217321","https://openalex.org/W2943547402","https://openalex.org/W2944294033","https://openalex.org/W2963081548","https://openalex.org/W2963290645","https://openalex.org/W2963822910","https://openalex.org/W2964095416","https://openalex.org/W2964559396","https://openalex.org/W2979894294","https://openalex.org/W2990235563","https://openalex.org/W2990452356","https://openalex.org/W3004048296","https://openalex.org/W3015814181","https://openalex.org/W3037611961","https://openalex.org/W3094412822","https://openalex.org/W3094524767","https://openalex.org/W3099319035","https://openalex.org/W3104377639","https://openalex.org/W3132207847","https://openalex.org/W3163767300","https://openalex.org/W3165439169","https://openalex.org/W4251591997","https://openalex.org/W4254724182","https://openalex.org/W4295521014","https://openalex.org/W6637373629","https://openalex.org/W6638035981","https://openalex.org/W6639824700","https://openalex.org/W6735913928","https://openalex.org/W6737896281","https://openalex.org/W6751750676","https://openalex.org/W6753277404","https://openalex.org/W6763638270"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3191326035","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463"],"abstract_inverted_index":{"Visual":[0],"emotion":[1,22,50,78,94,112],"expression":[2,23,79,95],"plays":[3],"an":[4,32],"important":[5],"role":[6],"in":[7,24],"audiovisual":[8,74],"speech":[9,41,63],"communication.":[10],"In":[11],"this":[12,144],"work,":[13],"we":[14,30,108],"propose":[15],"a":[16,40,43,48,56,86,110],"novel":[17],"approach":[18],"to":[19,54,133],"rendering":[20],"visual":[21,77,93,126,135],"speech-driven":[25],"talking":[26,34,57],"face":[27,35,45,58],"generation.":[28],"Specifically,":[29],"design":[31],"end-to-end":[33],"generation":[36],"system":[37,84],"that":[38,81,130],"takes":[39],"utterance,":[42],"single":[44],"image,":[46],"and":[47,64,76,96,125],"categorical":[49],"label":[51],"as":[52],"input":[53],"render":[55],"video":[59,97],"synchronized":[60],"with":[61,119],"the":[62,66,82,101,104,123,134,140],"expressing":[65],"conditioned":[67],"emotion.":[68],"Objective":[69],"evaluation":[70,91],"on":[71,143],"image":[72],"quality,":[73],"synchronization,":[75],"shows":[80],"proposed":[83,105],"outperforms":[85],"state-of-the-art":[87],"baseline":[88],"system.":[89,106],"Subjective":[90],"of":[92,103],"realness":[98],"also":[99],"demonstrates":[100],"superiority":[102],"Furthermore,":[107],"conduct":[109],"human":[111],"recognition":[113],"pilot":[114],"study":[115],"using":[116],"generated":[117],"videos":[118],"mismatched":[120],"emotions":[121],"among":[122],"audio":[124,141],"modalities.":[127],"Results":[128],"show":[129],"humans":[131],"respond":[132],"modality":[136,142],"more":[137],"significantly":[138],"than":[139],"task.":[145]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":32},{"year":2024,"cited_by_count":25},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
