{"id":"https://openalex.org/W2808195542","doi":"https://doi.org/10.21437/interspeech.2018-1943","title":"Deep Lip Reading: A Comparison of Models and an Online Application","display_name":"Deep Lip Reading: A Comparison of Models and an Online Application","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https://openalex.org/W2808195542","doi":"https://doi.org/10.21437/interspeech.2018-1943","mag":"2808195542"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2018-1943","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1806.06053","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018690028","display_name":"Triantafyllos Afouras","orcid":"https://orcid.org/0000-0002-3935-9681"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Triantafyllos Afouras","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038723822","display_name":"Joon Son Chung","orcid":"https://orcid.org/0000-0001-7741-7275"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joon Son Chung","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057678172","display_name":"Andrew Zisserman","orcid":"https://orcid.org/0000-0002-8945-8573"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew Zisserman","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5018690028"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":null,"apc_paid":null,"fwci":2.3214,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.89354923,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3514","last_page":"3518"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9609000086784363,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9556999802589417,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8201686143875122},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7233390212059021},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.7104721665382385},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6575788259506226},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.598694920539856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5331900119781494},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5046015977859497},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.501328706741333},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.48337286710739136},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.47665077447891235},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.43701502680778503},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41765716671943665},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.11500552296638489},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10313895344734192}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8201686143875122},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7233390212059021},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.7104721665382385},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6575788259506226},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.598694920539856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5331900119781494},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5046015977859497},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.501328706741333},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.48337286710739136},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.47665077447891235},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.43701502680778503},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41765716671943665},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.11500552296638489},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10313895344734192},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.21437/interspeech.2018-1943","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1806.06053","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1806.06053","pdf_url":"https://arxiv.org/pdf/1806.06053","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2808195542","is_oa":true,"landing_page_url":"https://www.arxiv.org/pdf/1806.06053","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:ora.ox.ac.uk:uuid:0243e187-4559-4007-ac9a-edfc0cebf69c","is_oa":false,"landing_page_url":"https://ora.ox.ac.uk/objects/uuid:0243e187-4559-4007-ac9a-edfc0cebf69c","pdf_url":null,"source":{"id":"https://openalex.org/S4306402636","display_name":"Oxford University Research Archive (ORA) (University of Oxford)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40120149","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/I40120149"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference item"},{"id":"doi:10.48550/arxiv.1806.06053","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1806.06053","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1806.06053","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1806.06053","pdf_url":"https://arxiv.org/pdf/1806.06053","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8700000047683716}],"awards":[{"id":"https://openalex.org/G1277543710","display_name":null,"funder_award_id":"EP/M013774/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G2018133609","display_name":"Seebibyte: Visual Search for the Era of Big Data","funder_award_id":"EP/M013774/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8416736120","display_name":null,"funder_award_id":"Seebibyte EP/M013774/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320317153","display_name":"DeepMind","ror":"https://ror.org/00971b260"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2808195542.pdf","grobid_xml":"https://content.openalex.org/works/W2808195542.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2964283370","https://openalex.org/W2952746495","https://openalex.org/W2594690981","https://openalex.org/W2194775991","https://openalex.org/W2127141656","https://openalex.org/W2015143272","https://openalex.org/W3011959832","https://openalex.org/W2964121744","https://openalex.org/W2963403868","https://openalex.org/W2891205112","https://openalex.org/W2883383043","https://openalex.org/W2596627958","https://openalex.org/W2578229578","https://openalex.org/W2267805933","https://openalex.org/W2753094133","https://openalex.org/W2964330921","https://openalex.org/W3045372535","https://openalex.org/W2806872492","https://openalex.org/W3095173472","https://openalex.org/W2782397232"],"abstract_inverted_index":{"The":[0,46],"goal":[1],"of":[2,117],"this":[3],"paper":[4],"is":[5,70],"to":[6],"develop":[7,18],"state-of-the-art":[8,80],"models":[9,51],"for":[10,66,111],"lip":[11,115],"reading":[12,116],"--":[13],"visual":[14],"speech":[15],"recognition.":[16],"We":[17],"three":[19],"architectures":[20],"and":[21,25,39,48,60,120],"compare":[22],"their":[23],"accuracy":[24],"training":[26],"times:":[27],"(i)":[28],"a":[29,35,55,71,100],"recurrent":[30,47],"model":[31,65,77,108],"using":[32],"LSTMs;":[33],"(ii)":[34],"fully":[36,49,106],"convolutional":[37,50,107],"model;":[38],"(iii)":[40],"the":[41,68,79,85,105],"recently":[42],"proposed":[43],"transformer":[44,69],"model.":[45,73],"are":[52],"trained":[53],"with":[54,127],"Connectionist":[56],"Temporal":[57],"Classification":[58],"loss":[59],"use":[61],"an":[62],"explicit":[63],"language":[64],"decoding,":[67],"sequence-to-sequence":[72],"Our":[74],"best":[75],"performing":[76],"improves":[78],"word":[81],"error":[82],"rate":[83],"on":[84],"challenging":[86],"BBC-Oxford":[87],"Lip":[88],"Reading":[89],"Sentences":[90],"2":[91],"(LRS2)":[92],"benchmark":[93],"dataset":[94],"by":[95],"over":[96],"20":[97],"percent.":[98],"As":[99],"further":[101],"contribution":[102],"we":[103],"investigate":[104],"when":[109],"used":[110],"online":[112],"(real":[113],"time)":[114],"continuous":[118],"speech,":[119],"show":[121],"that":[122],"it":[123],"achieves":[124],"high":[125],"performance":[126],"low":[128],"latency.":[129]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
