{"id":"https://openalex.org/W2766579050","doi":"https://doi.org/10.1109/icassp.2018.8461347","title":"Deep Word Embeddings for Visual Speech Recognition","display_name":"Deep Word Embeddings for Visual Speech Recognition","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2766579050","doi":"https://doi.org/10.1109/icassp.2018.8461347","mag":"2766579050"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8461347","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8461347","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1710.11201","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061939508","display_name":"Themos Stafylakis","orcid":"https://orcid.org/0000-0002-9227-3588"},"institutions":[{"id":"https://openalex.org/I142263535","display_name":"University of Nottingham","ror":"https://ror.org/01ee9ar58","country_code":"GB","type":"education","lineage":["https://openalex.org/I142263535"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Themos Stafylakis","raw_affiliation_strings":["Computer Vision Laboratory, University of Nottingham, UK"],"affiliations":[{"raw_affiliation_string":"Computer Vision Laboratory, University of Nottingham, UK","institution_ids":["https://openalex.org/I142263535"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024224610","display_name":"Georgios Tzimiropoulos","orcid":"https://orcid.org/0000-0002-1803-5338"},"institutions":[{"id":"https://openalex.org/I142263535","display_name":"University of Nottingham","ror":"https://ror.org/01ee9ar58","country_code":"GB","type":"education","lineage":["https://openalex.org/I142263535"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Georgios Tzimiropoulos","raw_affiliation_strings":["Computer Vision Laboratory, University of Nottingham, UK"],"affiliations":[{"raw_affiliation_string":"Computer Vision Laboratory, University of Nottingham, UK","institution_ids":["https://openalex.org/I142263535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061939508"],"corresponding_institution_ids":["https://openalex.org/I142263535"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00594344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4974","last_page":"4978"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8194931745529175},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7260518670082092},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.659420371055603},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6400173306465149},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5940392017364502},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5426949858665466},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.48617666959762573},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47976288199424744},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.42978036403656006},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.42613479495048523},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4192297160625458},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07928651571273804}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8194931745529175},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7260518670082092},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.659420371055603},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6400173306465149},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5940392017364502},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5426949858665466},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.48617666959762573},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47976288199424744},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.42978036403656006},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.42613479495048523},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4192297160625458},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07928651571273804},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1109/icassp.2018.8461347","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8461347","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.nottingham.ac.uk:51133","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306402013","display_name":"Nottingham ePrints (University of Nottingham)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I142263535","host_organization_name":"University of Nottingham","host_organization_lineage":["https://openalex.org/I142263535"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:arXiv.org:1710.11201","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1710.11201","pdf_url":"https://arxiv.org/pdf/1710.11201","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2766579050","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1710.11201.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:nottingham-repository.worktribe.com:925071","is_oa":true,"landing_page_url":"https://nottingham-repository.worktribe.com/output/925071","pdf_url":null,"source":{"id":"https://openalex.org/S4306402483","display_name":"Repository@Nottingham (University of Nottingham)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I142263535","host_organization_name":"University of Nottingham","host_organization_lineage":["https://openalex.org/I142263535"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},{"id":"doi:10.48550/arxiv.1710.11201","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1710.11201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1710.11201","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1710.11201","pdf_url":"https://arxiv.org/pdf/1710.11201","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6399999856948853,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W170472577","https://openalex.org/W1522301498","https://openalex.org/W1589137271","https://openalex.org/W2064364374","https://openalex.org/W2121812409","https://openalex.org/W2145287260","https://openalex.org/W2190506272","https://openalex.org/W2267805933","https://openalex.org/W2296681920","https://openalex.org/W2302255633","https://openalex.org/W2325939864","https://openalex.org/W2545177271","https://openalex.org/W2578229578","https://openalex.org/W2580471497","https://openalex.org/W2594690981","https://openalex.org/W2596627958","https://openalex.org/W2597757402","https://openalex.org/W2627092829","https://openalex.org/W2735762732","https://openalex.org/W2747135936","https://openalex.org/W2748488820","https://openalex.org/W2750499125","https://openalex.org/W2891226622","https://openalex.org/W2949117887","https://openalex.org/W2951015274","https://openalex.org/W2953291251","https://openalex.org/W2962988733","https://openalex.org/W2963192365","https://openalex.org/W2963356069","https://openalex.org/W2963528589","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6681342084","https://openalex.org/W6734491695","https://openalex.org/W6765296696","https://openalex.org/W7011482893"],"related_works":["https://openalex.org/W2963658982","https://openalex.org/W2916685081","https://openalex.org/W3037580942","https://openalex.org/W1512898506","https://openalex.org/W2979213336","https://openalex.org/W3102259066","https://openalex.org/W2766849450","https://openalex.org/W2796768192","https://openalex.org/W3099254168","https://openalex.org/W2951327905","https://openalex.org/W3011209123","https://openalex.org/W2953090514","https://openalex.org/W3131709275","https://openalex.org/W2262099980","https://openalex.org/W2799956544","https://openalex.org/W3150635893","https://openalex.org/W3009047643","https://openalex.org/W2772376594","https://openalex.org/W2912984882","https://openalex.org/W2794739275"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3],"present":[4],"a":[5,52,56,90],"deep":[6],"learning":[7,123],"architecture":[8,76],"for":[9,13],"extracting":[10],"word":[11,33,82],"embeddings":[12,18,102,119],"visual":[14,135],"speech":[15,136],"recognition.":[16],"The":[17,47,130],"summarize":[19],"the":[20,23,30,66,74,98,101,118,144,151],"information":[21],"of":[22,32,39,51,92,100],"mouth":[24],"region":[25],"that":[26,73,133],"is":[27,49,63,138],"relevant":[28],"to":[29,116],"problem":[31],"recognition,":[34],"while":[35],"suppressing":[36],"other":[37],"types":[38],"variability":[40],"such":[41],"as":[42],"speaker,":[43],"pose":[44],"and":[45,59,62,120],"illumination.":[46],"system":[48],"comprised":[50],"spatiotemporal":[53],"convolutional":[54],"layer,":[55],"Residual":[57],"Network":[58],"bidirectional":[60],"LSTMs":[61],"trained":[64],"on":[65,80,89,125],"Lipreading":[67],"in-the-wild":[68],"database.":[69],"We":[70,95,109],"first":[71],"show":[72],"proposed":[75],"goes":[77],"beyond":[78],"state-of-the-art":[79],"closed-set":[81],"identification,":[83],"by":[84],"attaining":[85],"11.92%":[86],"error":[87],"rate":[88],"vocabulary":[91],"500":[93],"words.":[94],"then":[96],"examine":[97],"capacity":[99],"in":[103,141,150],"modelling":[104],"words":[105,126,146],"unseen":[106,127],"during":[107,128],"training.":[108,129],"deploy":[110],"Probabilistic":[111],"Linear":[112],"Discriminant":[113],"Analysis":[114],"(PLDA)":[115],"model":[117],"perform":[121],"low-shot":[122],"experiments":[124,131],"demonstrate":[132],"word-level":[134],"recognition":[137],"feasible":[139],"even":[140],"cases":[142],"where":[143],"target":[145],"are":[147],"not":[148],"included":[149],"training":[152],"set.":[153]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
