{"id":"https://openalex.org/W7139960174","doi":"https://doi.org/10.1016/j.procs.2026.01.104","title":"Attention-Based LipNet Architectures for Robust Visual Speech Recognition in Multimodal Interfaces","display_name":"Attention-Based LipNet Architectures for Robust Visual Speech Recognition in Multimodal Interfaces","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7139960174","doi":"https://doi.org/10.1016/j.procs.2026.01.104"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2026.01.104","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.104","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2026.01.104","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130214832","display_name":"Taher M. Ghazal","orcid":null},"institutions":[{"id":"https://openalex.org/I885383172","display_name":"National University of Malaysia","ror":"https://ror.org/00bw8d226","country_code":"MY","type":"education","lineage":["https://openalex.org/I885383172"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Taher M. Ghazal","raw_affiliation_strings":["Faculty of Computing and IT, Sohar University, Oman, Department of Networks and Cybersecurity, Hourani Center for Applied Scientific Research, Al-Ahliyya Amman University, Amman, Jordan. Center for Cyber Security, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia (UKM), 43600 Bangi, Selangor, Malaysia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and IT, Sohar University, Oman, Department of Networks and Cybersecurity, Hourani Center for Applied Scientific Research, Al-Ahliyya Amman University, Amman, Jordan. Center for Cyber Security, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia (UKM), 43600 Bangi, Selangor, Malaysia","institution_ids":["https://openalex.org/I885383172"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130242660","display_name":"Reem Al Maawali","orcid":null},"institutions":[{"id":"https://openalex.org/I45998257","display_name":"Sohar University","ror":"https://ror.org/02ftvf862","country_code":"OM","type":"education","lineage":["https://openalex.org/I45998257"]}],"countries":["OM"],"is_corresponding":false,"raw_author_name":"Reem Al Maawali","raw_affiliation_strings":["Faculty of Computing and Information Technology, Sohar University, Sultanate of Oman"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Information Technology, Sohar University, Sultanate of Oman","institution_ids":["https://openalex.org/I45998257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060666138","display_name":"Falah Y. H. Ahmed","orcid":"https://orcid.org/0000-0003-2094-4028"},"institutions":[{"id":"https://openalex.org/I45998257","display_name":"Sohar University","ror":"https://ror.org/02ftvf862","country_code":"OM","type":"education","lineage":["https://openalex.org/I45998257"]}],"countries":["OM"],"is_corresponding":false,"raw_author_name":"Falah Y H Ahmed","raw_affiliation_strings":["Faculty of Computing and Information Technology, Sohar University, Sultanate of Oman"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Information Technology, Sohar University, Sultanate of Oman","institution_ids":["https://openalex.org/I45998257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026358796","display_name":"Eimad Abusham","orcid":"https://orcid.org/0000-0002-4895-2057"},"institutions":[{"id":"https://openalex.org/I45998257","display_name":"Sohar University","ror":"https://ror.org/02ftvf862","country_code":"OM","type":"education","lineage":["https://openalex.org/I45998257"]}],"countries":["OM"],"is_corresponding":false,"raw_author_name":"Eimad Abusham","raw_affiliation_strings":["Faculty of Computing and Information Technology, Sohar University, Sultanate of Oman"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Information Technology, Sohar University, Sultanate of Oman","institution_ids":["https://openalex.org/I45998257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002040454","display_name":"Amjed Abbas Ahmed","orcid":"https://orcid.org/0000-0001-6069-2967"},"institutions":[{"id":"https://openalex.org/I885383172","display_name":"National University of Malaysia","ror":"https://ror.org/00bw8d226","country_code":"MY","type":"education","lineage":["https://openalex.org/I885383172"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Amjed Abbas Ahmed","raw_affiliation_strings":["Center for Artificial Intelligent Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi 43600, Imam Al Kadhum College (IKC), Department of Computer Science, University of Technology, Baghdad 10066, Iraq"],"affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligent Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi 43600, Imam Al Kadhum College (IKC), Department of Computer Science, University of Technology, Baghdad 10066, Iraq","institution_ids":["https://openalex.org/I885383172"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5002040454","https://openalex.org/A5130214832"],"corresponding_institution_ids":["https://openalex.org/I885383172"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.94966561,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"275","issue":null,"first_page":"915","last_page":"922"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.006899999920278788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.0020000000949949026,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.3395000100135803},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.32170000672340393},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.3165999948978424},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.3158999979496002},{"id":"https://openalex.org/keywords/user-interface","display_name":"User interface","score":0.31060001254081726},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.30469998717308044}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9222000241279602},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6151000261306763},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4537999927997589},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44760000705718994},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35839998722076416},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3395000100135803},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33480000495910645},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.3165999948978424},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3158999979496002},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2955000102519989},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.2888000011444092},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2660999894142151}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.procs.2026.01.104","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.104","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2026.01.104","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.104","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W3000050145","https://openalex.org/W3111240127","https://openalex.org/W3217109194","https://openalex.org/W4210475104","https://openalex.org/W4224946905","https://openalex.org/W4293564052","https://openalex.org/W4297904486","https://openalex.org/W4321615128","https://openalex.org/W4322740421","https://openalex.org/W4362566179","https://openalex.org/W4379033920","https://openalex.org/W4385067602","https://openalex.org/W4387805894","https://openalex.org/W4390506533","https://openalex.org/W4390858868","https://openalex.org/W4391431903","https://openalex.org/W4391610999","https://openalex.org/W4392466655","https://openalex.org/W4394887488","https://openalex.org/W4394929443","https://openalex.org/W4395675494","https://openalex.org/W4396553703","https://openalex.org/W4396601469","https://openalex.org/W4396632288","https://openalex.org/W4396680728","https://openalex.org/W4398185455","https://openalex.org/W4399457811","https://openalex.org/W4399563613","https://openalex.org/W4400526454","https://openalex.org/W4403104615","https://openalex.org/W4403111444","https://openalex.org/W4403938618","https://openalex.org/W4409103185","https://openalex.org/W4409274446","https://openalex.org/W4411053797","https://openalex.org/W4411968505","https://openalex.org/W4412044520","https://openalex.org/W4413001513"],"related_works":[],"abstract_inverted_index":{"Visual":[0,43],"Speech":[1],"Recognition":[2],"(VSR)":[3],"is":[4],"crucial":[5],"in":[6,13],"multimodal":[7,141],"human-computer":[8],"interactions":[9],"for":[10,17,144],"speech":[11],"interpretation":[12],"noisy":[14],"environments":[15],"or":[16],"users":[18],"with":[19,26],"hearing":[20],"impairments.":[21],"Traditional":[22],"VSR":[23],"models":[24],"struggle":[25],"temporal":[27,74],"unpredictability,":[28],"speaker-dependent":[29],"lip":[30,68],"movements,":[31],"and":[32,61,71,80,85,98,112,122,129,139,151],"contextual":[33],"ambiguity.":[34],"To":[35],"address":[36],"these":[37],"issues,":[38],"this":[39],"paper":[40],"presents":[41],"the":[42],"Attention":[44],"LipNet":[45,111],"Network":[46],"(V-LipNet),":[47],"which":[48],"utilizes":[49],"Particle":[50],"Swarm":[51],"Optimization":[52],"(PSO)":[53],"to":[54,89,119],"adjust":[55],"learning":[56],"rates,":[57],"convolutional":[58,82],"filter":[59],"sizes,":[60],"attention":[62,130],"weights.":[63],"V-LipNet":[64,105,135],"dynamically":[65],"focuses":[66],"on":[67,92],"movement":[69],"features":[70],"captures":[72],"long-range":[73],"associations":[75],"via":[76],"a":[77,137],"self-attention":[78],"mechanism":[79],"spatiotemporal":[81],"layers.":[83],"WER":[84,107],"SA":[86],"were":[87],"used":[88],"evaluate":[90],"performance":[91],"benchmark":[93],"datasets,":[94],"including":[95],"GRID,":[96],"TCD-TIMIT,":[97],"LRS2.":[99],"The":[100],"findings":[101],"reveal":[102,125],"that":[103,115,126],"PSO-optimized":[104],"reduces":[106],"better":[108],"than":[109],"traditional":[110],"LSTM,":[113],"indicating":[114],"it":[116],"can":[117],"generalize":[118],"unseen":[120],"speakers":[121],"noise.":[123],"Results":[124],"metaheuristic":[127],"optimization":[128],"processes":[131],"work":[132],"effectively.":[133],"Ultimately,":[134],"provides":[136],"powerful":[138],"adaptable":[140],"interface":[142],"solution":[143],"assistive":[145],"technology,":[146],"language":[147],"learning,":[148],"communication":[149],"networks,":[150],"human-robot":[152],"interaction.":[153]},"counts_by_year":[],"updated_date":"2026-03-22T06:25:25.174409","created_date":"2026-03-21T00:00:00"}
