{"id":"https://openalex.org/W3015814181","doi":"https://doi.org/10.1109/icassp40776.2020.9054103","title":"End-To-End Generation of Talking Faces from Noisy Speech","display_name":"End-To-End Generation of Talking Faces from Noisy Speech","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015814181","doi":"https://doi.org/10.1109/icassp40776.2020.9054103","mag":"3015814181"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054103","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026088950","display_name":"\u015eefik Emre Eskimez","orcid":"https://orcid.org/0000-0001-6259-5925"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sefik Emre Eskimez","raw_affiliation_strings":["Electrical and Computer Engineering, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031488938","display_name":"Ross K. Maddox","orcid":"https://orcid.org/0000-0003-2668-0238"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ross K. Maddox","raw_affiliation_strings":["Biomedical Engineering & Neuroscience, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Biomedical Engineering & Neuroscience, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064805926","display_name":"Chenliang Xu","orcid":"https://orcid.org/0000-0002-2183-822X"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenliang Xu","raw_affiliation_strings":["Computer Science University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102749436","display_name":"Zhiyao Duan","orcid":"https://orcid.org/0000-0002-8334-9974"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiyao Duan","raw_affiliation_strings":["Electrical and Computer Engineering, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5026088950"],"corresponding_institution_ids":["https://openalex.org/I5388228"],"apc_list":null,"apc_paid":null,"fwci":2.7276,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.90954575,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1948","last_page":"1952"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7512755393981934},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7139115929603577},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5834099054336548},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.493040531873703},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4809778928756714},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4741474986076355},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.44607993960380554},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4139925241470337},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3721994161605835},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.28451406955718994},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.28188857436180115},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10815402865409851}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7512755393981934},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7139115929603577},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5834099054336548},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.493040531873703},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4809778928756714},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4741474986076355},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.44607993960380554},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4139925241470337},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3721994161605835},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.28451406955718994},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.28188857436180115},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10815402865409851},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054103","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054103","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1976985738","https://openalex.org/W2015143272","https://openalex.org/W2023055982","https://openalex.org/W2032618685","https://openalex.org/W2099471712","https://openalex.org/W2115252128","https://openalex.org/W2133665775","https://openalex.org/W2155494341","https://openalex.org/W2593414223","https://openalex.org/W2594690981","https://openalex.org/W2738406145","https://openalex.org/W2944294033","https://openalex.org/W2963009026","https://openalex.org/W2963081548","https://openalex.org/W2963290645","https://openalex.org/W2964095416","https://openalex.org/W2964559396","https://openalex.org/W2979894294","https://openalex.org/W2980581183","https://openalex.org/W3089669417","https://openalex.org/W4320013936","https://openalex.org/W6639824700","https://openalex.org/W6677618333","https://openalex.org/W6734491695","https://openalex.org/W6750008037","https://openalex.org/W6751750676","https://openalex.org/W6768827828"],"related_works":["https://openalex.org/W2120771489","https://openalex.org/W2051376034","https://openalex.org/W2294333436","https://openalex.org/W2955597484","https://openalex.org/W2653598178","https://openalex.org/W2373767407","https://openalex.org/W3110551121","https://openalex.org/W2747006289","https://openalex.org/W3160071434","https://openalex.org/W2131486661"],"abstract_inverted_index":{"Acoustic":[0],"cues":[1],"are":[2],"not":[3],"the":[4,11,53,76],"only":[5],"component":[6],"in":[7,154],"speech":[8,21,62,110,153],"communication;":[9],"if":[10],"visual":[12],"counterpart":[13],"is":[14,17],"present,":[15],"it":[16],"shown":[18],"to":[19,51,55,74,87,92],"benefit":[20],"comprehension.":[22],"In":[23,65],"this":[24],"work,":[25],"we":[26,67,83],"propose":[27,46],"an":[28],"end-to-end":[29],"(no":[30],"pre-":[31],"or":[32],"post-processing)":[33],"system":[34,99,138,143],"that":[35,136],"can":[36],"generate":[37],"talking":[38],"faces":[39],"from":[40],"arbitrarily":[41],"long":[42],"noisy":[43,109,152],"speech.":[44],"We":[45,96],"a":[47,140,155],"mouth":[48,58,104],"region":[49],"mask":[50],"encourage":[52],"network":[54,71,90],"focus":[56],"on":[57,108],"movements":[59],"rather":[60],"than":[61],"irrelevant":[63],"movements.":[64],"addition,":[66],"use":[68],"generative":[69],"adversarial":[70],"(GAN)":[72],"training":[73,86,148],"improve":[75],"image":[77,101],"quality":[78,102],"and":[79,103,122,145],"mouth-speech":[80],"synchronization.":[81],"Furthermore,":[82],"employ":[84],"noise-resilient":[85,147],"make":[88],"our":[89,98,137,146],"robust":[91],"unseen":[93,116],"non-stationary":[94,117],"noise.":[95],"evaluate":[97],"with":[100,112,128],"shape":[105],"(landmark)":[106],"measures":[107],"utterances":[111],"five":[113],"types":[114],"of":[115,130,158],"noise":[118],"between":[119],"-10":[120],"dB":[121,124,132],"30":[123],"signal-to-noise":[125],"ratio":[126],"(SNR)":[127],"increments":[129],"1":[131],"SNR.":[133,159],"Results":[134],"show":[135],"outperforms":[139],"state-of-the-art":[141],"baseline":[142],"significantly,":[144],"improves":[149],"performance":[150],"for":[151],"wide":[156],"range":[157]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
