{"id":"https://openalex.org/W2980581183","doi":"https://doi.org/10.1109/taslp.2019.2947741","title":"Noise-Resilient Training Method for Face Landmark Generation From Speech","display_name":"Noise-Resilient Training Method for Face Landmark Generation From Speech","publication_year":2019,"publication_date":"2019-10-16","ids":{"openalex":"https://openalex.org/W2980581183","doi":"https://doi.org/10.1109/taslp.2019.2947741","mag":"2980581183"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2019.2947741","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2947741","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026088950","display_name":"\u015eefik Emre Eskimez","orcid":"https://orcid.org/0000-0001-6259-5925"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sefik Emre Eskimez","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Rochester, Rochester, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031488938","display_name":"Ross K. Maddox","orcid":"https://orcid.org/0000-0003-2668-0238"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ross K. Maddox","raw_affiliation_strings":["Department of Biomedical Engineering and Department of Neuroscience, University of Rochester, Rochester, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Engineering and Department of Neuroscience, University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064805926","display_name":"Chenliang Xu","orcid":"https://orcid.org/0000-0002-2183-822X"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenliang Xu","raw_affiliation_strings":["Department of Computer Science, University of Rochester, Rochester, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102749436","display_name":"Zhiyao Duan","orcid":"https://orcid.org/0000-0002-8334-9974"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiyao Duan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Rochester, Rochester, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5026088950"],"corresponding_institution_ids":["https://openalex.org/I5388228"],"apc_list":null,"apc_paid":null,"fwci":1.5328,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.86700808,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"28","issue":null,"first_page":"27","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8048450350761414},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7059580683708191},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.6986845135688782},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.6007825136184692},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5437394976615906},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4693993628025055},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4294623136520386},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.424360990524292},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.17304366827011108}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8048450350761414},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7059580683708191},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.6986845135688782},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.6007825136184692},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5437394976615906},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4693993628025055},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4294623136520386},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.424360990524292},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.17304366827011108},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2019.2947741","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2947741","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7200000286102295}],"awards":[{"id":"https://openalex.org/G5157521015","display_name":null,"funder_award_id":"1741472","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310246","display_name":"University of Rochester","ror":"https://ror.org/022kthw22"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1419436964","https://openalex.org/W1503933356","https://openalex.org/W1506441995","https://openalex.org/W1522301498","https://openalex.org/W1539720309","https://openalex.org/W1569907127","https://openalex.org/W1976985738","https://openalex.org/W1993583497","https://openalex.org/W2004789217","https://openalex.org/W2015143272","https://openalex.org/W2023055982","https://openalex.org/W2027834570","https://openalex.org/W2032618685","https://openalex.org/W2038952578","https://openalex.org/W2043003570","https://openalex.org/W2073726098","https://openalex.org/W2082308025","https://openalex.org/W2105055531","https://openalex.org/W2113713975","https://openalex.org/W2115252128","https://openalex.org/W2128173845","https://openalex.org/W2152826865","https://openalex.org/W2155494341","https://openalex.org/W2165942021","https://openalex.org/W2266199501","https://openalex.org/W2295661697","https://openalex.org/W2296650210","https://openalex.org/W2398336962","https://openalex.org/W2738406145","https://openalex.org/W2739192055","https://openalex.org/W2745771616","https://openalex.org/W2762899171","https://openalex.org/W2795230330","https://openalex.org/W2896348597","https://openalex.org/W2899771611","https://openalex.org/W2912990735","https://openalex.org/W2949662773","https://openalex.org/W2963009026","https://openalex.org/W2963081548","https://openalex.org/W2963192365","https://openalex.org/W2964121744","https://openalex.org/W2964171275","https://openalex.org/W3104792420","https://openalex.org/W3123318516","https://openalex.org/W4249433378","https://openalex.org/W6628308797","https://openalex.org/W6631190155","https://openalex.org/W6632327317","https://openalex.org/W6676946368","https://openalex.org/W6677618333","https://openalex.org/W6684544547","https://openalex.org/W6697679221","https://openalex.org/W6737896281","https://openalex.org/W6745691803","https://openalex.org/W6750008037","https://openalex.org/W6750169759","https://openalex.org/W6750333878","https://openalex.org/W6756040250"],"related_works":["https://openalex.org/W2056853153","https://openalex.org/W2057559274","https://openalex.org/W2026924879","https://openalex.org/W2005087563","https://openalex.org/W2378111931","https://openalex.org/W2052388267","https://openalex.org/W2950647290","https://openalex.org/W1968481813","https://openalex.org/W2620829895","https://openalex.org/W2132337154"],"abstract_inverted_index":{"Visual":[0],"cues":[1],"such":[2],"as":[3,84,133,238],"lip":[4],"movements,":[5],"when":[6],"available,":[7,30],"play":[8],"an":[9,71,85,134],"important":[10],"role":[11],"in":[12,25,39,70],"speech":[13,43,46,69,150,189],"communication.":[14],"They":[15],"are":[16],"especially":[17],"helpful":[18],"for":[19],"the":[20,81,97,118,122,128,154,164,203,208,217,228,239],"hearing":[21],"impaired":[22],"population":[23],"or":[24],"noisy":[26,188],"environments.":[27],"When":[28],"not":[29],"having":[31],"a":[32,58,76,115,144,179,183,222,232],"system":[33,60,166,204],"to":[34,148,207],"automatically":[35],"generate":[36,63],"talking":[37,65,219],"faces":[38,220],"sync":[40],"with":[41,92,139,191,227],"input":[42,190,229],"would":[44],"enhance":[45],"communication":[47],"and":[48,95,182],"enable":[49],"many":[50],"novel":[51],"applications.":[52],"In":[53],"this":[54],"article,":[55],"we":[56,113,142],"present":[57,114],"new":[59,145],"that":[61,79,120,163,216],"can":[62],"3D":[64],"face":[66,104],"landmarks":[67],"from":[68],"online":[72],"fashion.":[73],"We":[74],"employ":[75],"neural":[77],"network":[78,88],"accepts":[80,127],"raw":[82],"waveform":[83],"input.":[86,136],"The":[87],"contains":[89],"convolutional":[90],"layers":[91],"1D":[93],"kernels":[94],"outputs":[96],"active":[98],"shape":[99],"model":[100,119],"(ASM)":[101],"coefficients":[102,132],"of":[103,117,194,202,236],"landmarks.":[105,241],"To":[106,137],"promote":[107],"smoother":[108],"transitions":[109],"between":[110],"video":[111],"frames,":[112],"variant":[116],"has":[121],"same":[123],"architecture":[124],"but":[125],"also":[126],"previous":[129],"frame's":[130],"ASM":[131],"additional":[135],"cope":[138],"background":[140],"noise,":[141],"propose":[143],"training":[146,210],"method":[147],"incorporate":[149],"enhancement":[151],"ideas":[152],"at":[153],"feature":[155],"level.":[156],"Objective":[157],"evaluations":[158,214],"on":[159,177,187],"landmark":[160],"prediction":[161],"show":[162,198,215],"proposed":[165],"yields":[167],"statistically":[168,199],"significantly":[169,223],"smaller":[170],"errors":[171],"than":[172],"two":[173],"state-of-the-art":[174],"baseline":[175],"methods":[176],"both":[178],"single-speaker":[180],"dataset":[181],"multi-speaker":[184],"dataset.":[185],"Experiments":[186],"five":[192],"types":[193],"non-stationary":[195],"unseen":[196],"noise":[197],"significant":[200],"improvements":[201],"performance":[205],"thanks":[206],"noise-resilient":[209],"method.":[211],"Finally,":[212],"subjective":[213],"generated":[218],"have":[221],"more":[224],"convincing":[225,234],"match":[226],"audio,":[230],"achieving":[231],"similarly":[233],"level":[235],"realism":[237],"ground-truth":[240]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
