{"id":"https://openalex.org/W4378574822","doi":"https://doi.org/10.3390/a16060269","title":"Efficient DNN Model for Word Lip-Reading","display_name":"Efficient DNN Model for Word Lip-Reading","publication_year":2023,"publication_date":"2023-05-27","ids":{"openalex":"https://openalex.org/W4378574822","doi":"https://doi.org/10.3390/a16060269"},"language":"en","primary_location":{"id":"doi:10.3390/a16060269","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16060269","pdf_url":"https://www.mdpi.com/1999-4893/16/6/269/pdf?version=1685344003","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/16/6/269/pdf?version=1685344003","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082175861","display_name":"Taiki Arakane","orcid":null},"institutions":[{"id":"https://openalex.org/I207014233","display_name":"Kyushu Institute of Technology","ror":"https://ror.org/02278tr80","country_code":"JP","type":"education","lineage":["https://openalex.org/I207014233"]},{"id":"https://openalex.org/I4210143983","display_name":"Kyushu Art Institute of Technology","ror":"https://ror.org/03t4t2e74","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210143983"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Taiki Arakane","raw_affiliation_strings":["Department of Artificial Intelligence, Kyushu Institute of Technology, Fukuoka 820-8502, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Kyushu Institute of Technology, Fukuoka 820-8502, Japan","institution_ids":["https://openalex.org/I4210143983","https://openalex.org/I207014233"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058108485","display_name":"Takeshi Saitoh","orcid":"https://orcid.org/0000-0001-8844-9707"},"institutions":[{"id":"https://openalex.org/I207014233","display_name":"Kyushu Institute of Technology","ror":"https://ror.org/02278tr80","country_code":"JP","type":"education","lineage":["https://openalex.org/I207014233"]},{"id":"https://openalex.org/I4210143983","display_name":"Kyushu Art Institute of Technology","ror":"https://ror.org/03t4t2e74","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210143983"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takeshi Saitoh","raw_affiliation_strings":["Department of Artificial Intelligence, Kyushu Institute of Technology, Fukuoka 820-8502, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Kyushu Institute of Technology, Fukuoka 820-8502, Japan","institution_ids":["https://openalex.org/I4210143983","https://openalex.org/I207014233"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5058108485"],"corresponding_institution_ids":["https://openalex.org/I207014233","https://openalex.org/I4210143983"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":4.0903,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.95072643,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"16","issue":"6","first_page":"269","last_page":"269"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7697805166244507},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6308454871177673},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5944675803184509},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5943889617919922},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.5560559034347534},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5069354176521301},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.49688318371772766},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4910397529602051},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.46784719824790955},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4604073464870453},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4529321789741516},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3564215302467346},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35267671942710876},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10046163201332092},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08176463842391968}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7697805166244507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6308454871177673},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5944675803184509},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5943889617919922},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.5560559034347534},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5069354176521301},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.49688318371772766},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4910397529602051},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.46784719824790955},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4604073464870453},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4529321789741516},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3564215302467346},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35267671942710876},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10046163201332092},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08176463842391968},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/a16060269","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16060269","pdf_url":"https://www.mdpi.com/1999-4893/16/6/269/pdf?version=1685344003","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:e07e09f50ccf43f0a8fb727864cb1098","is_oa":true,"landing_page_url":"https://doaj.org/article/e07e09f50ccf43f0a8fb727864cb1098","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 16, Iss 6, p 269 (2023)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1999-4893/16/6/269/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/a16060269","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms; Volume 16; Issue 6; Pages: 269","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/a16060269","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16060269","pdf_url":"https://www.mdpi.com/1999-4893/16/6/269/pdf?version=1685344003","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8799999952316284,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1069223013","display_name":null,"funder_award_id":"JSPS KAKENHI","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3459562248","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4227499671","display_name":null,"funder_award_id":"KAKENHI Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4636223006","display_name":null,"funder_award_id":"JSPS KAK","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5044791890","display_name":"An empirical study of communication support using facial image processing considering the progressiveness of patients with intractable neurological diseases","funder_award_id":"19KT0029","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4378574822.pdf"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W2084272571","https://openalex.org/W2087681821","https://openalex.org/W2136155248","https://openalex.org/W2148659689","https://openalex.org/W2164598857","https://openalex.org/W2183341477","https://openalex.org/W2551572271","https://openalex.org/W2553303224","https://openalex.org/W2737034911","https://openalex.org/W2897492880","https://openalex.org/W2903346628","https://openalex.org/W2946948417","https://openalex.org/W2963528589","https://openalex.org/W2963654155","https://openalex.org/W2964137095","https://openalex.org/W2969985801","https://openalex.org/W3016011581","https://openalex.org/W3034552680","https://openalex.org/W3035682985","https://openalex.org/W3094502228","https://openalex.org/W3162707322","https://openalex.org/W3165010056","https://openalex.org/W3174885539","https://openalex.org/W4200484186","https://openalex.org/W4213274822","https://openalex.org/W4214612132","https://openalex.org/W4224319127","https://openalex.org/W4225299282","https://openalex.org/W4225305629","https://openalex.org/W4312920990","https://openalex.org/W4315650277","https://openalex.org/W4316372770","https://openalex.org/W4319068597","https://openalex.org/W6681898093","https://openalex.org/W6683411478","https://openalex.org/W6687483927","https://openalex.org/W6734491695","https://openalex.org/W6739901393","https://openalex.org/W6745136726","https://openalex.org/W6757817989"],"related_works":["https://openalex.org/W4375867731","https://openalex.org/W2055243143","https://openalex.org/W2611989081","https://openalex.org/W4230611425","https://openalex.org/W2731899572","https://openalex.org/W4304166257","https://openalex.org/W4206178588","https://openalex.org/W4287635093","https://openalex.org/W3094491777","https://openalex.org/W3214715529"],"abstract_inverted_index":{"This":[0,43],"paper":[1,44],"studies":[2,35],"various":[3],"deep":[4,46,168],"learning":[5,18,47,169],"models":[6,48,145,161,170],"for":[7,162,190,196,200],"word-level":[8],"lip-reading":[9,30,38,164],"technology,":[10],"one":[11,79],"of":[12,19,80,97,114,186],"the":[13,16,29,58,81,94,112,115,136,159],"tasks":[14,165],"in":[15,28,57,74,91,139],"supervised":[17],"video":[20],"classification.":[21],"Several":[22],"public":[23,83],"datasets":[24,73,84,202],"have":[25,36,105],"been":[26,106],"published":[27],"research":[31,103],"field.":[32,76],"However,":[33],"few":[34],"investigated":[37],"techniques":[39],"using":[40,49,166],"multiple":[41],"datasets.":[42],"evaluates":[45],"four":[50,201],"publicly":[51],"available":[52],"datasets,":[53],"namely":[54],"Lip":[55],"Reading":[56],"Wild":[59],"(LRW),":[60],"OuluVS,":[61],"CUAVE,":[62],"and":[63,85,155,157,175,193],"Speech":[64],"Scene":[65],"by":[66,121],"Smart":[67],"Device":[68],"(SSSD),":[69],"which":[70],"are":[71,198],"representative":[72],"this":[75,140],"LRW":[77,98],"is":[78],"large-scale":[82],"targets":[86],"500":[87],"English":[88],"words":[89],"released":[90],"2016.":[92],"Initially,":[93],"recognition":[95,178],"accuracy":[96],"was":[99],"66.1%,":[100],"but":[101],"many":[102],"groups":[104],"working":[107],"on":[108],"it.":[109],"The":[110],"current":[111],"state":[113],"art":[116],"(SOTA)":[117],"has":[118],"achieved":[119],"94.1%":[120],"3D-Conv":[122,187],"+":[123,125,129,132,188],"ResNet18":[124,189],"{DC-TCN,":[126],"MS-TCN,":[127,152],"BGRU}":[128],"knowledge":[130],"distillation":[131],"word":[133,163],"boundary.":[134],"Regarding":[135],"SOTA":[137],"model,":[138],"paper,":[141],"we":[142,180],"combine":[143],"existing":[144],"such":[146],"as":[147],"ResNet,":[148],"WideResNet,":[149,150],"EfficientNet,":[151],"Transformer,":[153],"ViT,":[154],"ViViT,":[156],"investigate":[158],"effective":[160],"six":[167],"with":[171,203],"modified":[172],"feature":[173,191],"extractors":[174],"classifiers.":[176],"Through":[177],"experiments,":[179],"show":[181],"that":[182],"similar":[183],"model":[184,195],"structures":[185],"extraction":[192],"MS-TCN":[194],"inference":[197],"valid":[199],"different":[204],"scales.":[205]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2023-05-28T00:00:00"}
