{"id":"https://openalex.org/W2625027024","doi":"https://doi.org/10.1109/taslp.2017.2716178","title":"Generating Intelligible Audio Speech From Visual Speech","display_name":"Generating Intelligible Audio Speech From Visual Speech","publication_year":2017,"publication_date":"2017-06-15","ids":{"openalex":"https://openalex.org/W2625027024","doi":"https://doi.org/10.1109/taslp.2017.2716178","mag":"2625027024"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2017.2716178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2716178","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022796258","display_name":"Thomas Le Cornu","orcid":null},"institutions":[{"id":"https://openalex.org/I1118541","display_name":"University of East Anglia","ror":"https://ror.org/026k5mg93","country_code":"GB","type":"education","lineage":["https://openalex.org/I1118541"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Thomas Le Cornu","raw_affiliation_strings":["School of Computing Sciences, University of East Anglia, Norwich, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing Sciences, University of East Anglia, Norwich, U.K","institution_ids":["https://openalex.org/I1118541"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023076395","display_name":"Ben Milner","orcid":null},"institutions":[{"id":"https://openalex.org/I1118541","display_name":"University of East Anglia","ror":"https://ror.org/026k5mg93","country_code":"GB","type":"education","lineage":["https://openalex.org/I1118541"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ben Milner","raw_affiliation_strings":["School of Computing Sciences, University of East Anglia, Norwich, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing Sciences, University of East Anglia, Norwich, U.K","institution_ids":["https://openalex.org/I1118541"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.1748,"has_fulltext":false,"cited_by_count":46,"citation_normalized_percentile":{"value":0.92827897,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"25","issue":"9","first_page":"1751","last_page":"1761"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7932193875312805},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7783287763595581},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.633851945400238},{"id":"https://openalex.org/keywords/spectral-envelope","display_name":"Spectral envelope","score":0.49461671710014343},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4900928735733032},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.43484798073768616},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4331642985343933},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4245070517063141},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4238746166229248},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4101860523223877},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38315704464912415}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7932193875312805},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7783287763595581},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.633851945400238},{"id":"https://openalex.org/C54926389","wikidata":"https://www.wikidata.org/wiki/Q7575188","display_name":"Spectral envelope","level":2,"score":0.49461671710014343},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4900928735733032},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.43484798073768616},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4331642985343933},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4245070517063141},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4238746166229248},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4101860523223877},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38315704464912415},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2017.2716178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2716178","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:ueaeprints.uea.ac.uk:64052","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400384","display_name":"UEA Digital Repository (University of East Anglia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1118541","host_organization_name":"University of East Anglia","host_organization_lineage":["https://openalex.org/I1118541"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.47999998927116394,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G455538984","display_name":null,"funder_award_id":"EP/M014053/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W139257441","https://openalex.org/W142803501","https://openalex.org/W167578630","https://openalex.org/W169882265","https://openalex.org/W1423512047","https://openalex.org/W1536972981","https://openalex.org/W1552314771","https://openalex.org/W1554064406","https://openalex.org/W1869491686","https://openalex.org/W1908325895","https://openalex.org/W1999673299","https://openalex.org/W2005708641","https://openalex.org/W2008120082","https://openalex.org/W2014240681","https://openalex.org/W2015143272","https://openalex.org/W2025515167","https://openalex.org/W2038952578","https://openalex.org/W2039640471","https://openalex.org/W2059761048","https://openalex.org/W2090861223","https://openalex.org/W2096263221","https://openalex.org/W2097998348","https://openalex.org/W2113814270","https://openalex.org/W2116529913","https://openalex.org/W2121486117","https://openalex.org/W2129142580","https://openalex.org/W2131062138","https://openalex.org/W2137400100","https://openalex.org/W2142838865","https://openalex.org/W2143612262","https://openalex.org/W2152826865","https://openalex.org/W2154354834","https://openalex.org/W2157867825","https://openalex.org/W2160815625","https://openalex.org/W2164764235","https://openalex.org/W2168348268","https://openalex.org/W2170942820","https://openalex.org/W2267805933","https://openalex.org/W2276139887","https://openalex.org/W2293634267","https://openalex.org/W2293856338","https://openalex.org/W2295518984","https://openalex.org/W2395578248","https://openalex.org/W2397008871","https://openalex.org/W2516001803","https://openalex.org/W2539331707","https://openalex.org/W2952746495","https://openalex.org/W4285719527","https://openalex.org/W4300458848","https://openalex.org/W6605722544","https://openalex.org/W6605737493","https://openalex.org/W6606709985","https://openalex.org/W6606983177","https://openalex.org/W6632994029","https://openalex.org/W6639968709","https://openalex.org/W6654041629","https://openalex.org/W6674385629","https://openalex.org/W6677131165","https://openalex.org/W6694381704","https://openalex.org/W6696934422","https://openalex.org/W6697389695","https://openalex.org/W6711777497","https://openalex.org/W6712456248","https://openalex.org/W6765296696"],"related_works":["https://openalex.org/W1572861854","https://openalex.org/W2120260542","https://openalex.org/W2113851715","https://openalex.org/W1894575273","https://openalex.org/W2109051065","https://openalex.org/W1583560604","https://openalex.org/W2036980031","https://openalex.org/W2347902650","https://openalex.org/W1963360513","https://openalex.org/W2625027024"],"abstract_inverted_index":{"This":[0,128],"paper":[1],"is":[2,66],"concerned":[3],"with":[4,131],"generating":[5],"intelligible":[6],"audio":[7],"speech":[8,94,126],"from":[9,29,69,116],"a":[10,13,55,93,111,117,132,139,143],"video":[11],"of":[12,114,119,146],"person":[14],"talking.":[15],"Regression":[16],"and":[17,54,79],"classification":[18],"methods":[19,37,74],"are":[20,38,82,85,99],"proposed":[21],"first":[22],"to":[23,41,75,91],"estimate":[24],"static":[25],"spectral":[26],"envelope":[27],"features":[28],"active":[30],"appearance":[31],"model":[32],"visual":[33,71],"features.":[34],"Two":[35],"further":[36],"then":[39],"developed":[40],"incorporate":[42],"temporal":[43],"information":[44,65],"into":[45],"the":[46,70,88,123],"prediction:":[47],"A":[48],"feature-level":[49],"method":[50,57],"using":[51],"multiple":[52],"frames":[53],"model-level":[56],"based":[58],"on":[59,122],"recurrent":[60],"neural":[61],"networks.":[62],"Speech":[63],"excitation":[64],"not":[67],"available":[68],"signal,":[72],"so":[73],"artificially":[76],"generate":[77],"aperiodicity":[78],"fundamental":[80],"frequency":[81],"developed.":[83],"These":[84],"combined":[86],"within":[87],"STRAIGHT":[89],"vocoder":[90],"produce":[92],"signal.":[95],"The":[96],"various":[97],"systems":[98],"optimized":[100],"through":[101],"objective":[102],"tests":[103,108],"before":[104],"applying":[105],"subjective":[106],"intelligibility":[107],"that":[109,136],"determine":[110],"word":[112,144],"accuracy":[113,145],"85%":[115],"set":[118],"human":[120],"listeners":[121],"GRID":[124],"audio-visual":[125],"database.":[127],"compares":[129],"favorably":[130],"previous":[133],"regression-based":[134],"system":[135],"serves":[137],"as":[138],"baseline,":[140],"which":[141],"achieved":[142],"33%.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
