{"id":"https://openalex.org/W4417130949","doi":"https://doi.org/10.1109/jbhi.2025.3618998","title":"Optimizing Deep Neural Networks for EEG-Based Speech Recognition: A Multimodal Approach to Assistive Communication","display_name":"Optimizing Deep Neural Networks for EEG-Based Speech Recognition: A Multimodal Approach to Assistive Communication","publication_year":2025,"publication_date":"2025-12-01","ids":{"openalex":"https://openalex.org/W4417130949","doi":"https://doi.org/10.1109/jbhi.2025.3618998","pmid":"https://pubmed.ncbi.nlm.nih.gov/41359692"},"language":"en","primary_location":{"id":"doi:10.1109/jbhi.2025.3618998","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jbhi.2025.3618998","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036034521","display_name":"Anarghya Das","orcid":"https://orcid.org/0000-0001-8146-2482"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anarghya Das","raw_affiliation_strings":["Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA"],"raw_orcid":"https://orcid.org/0000-0001-8146-2482","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067447384","display_name":"Puru Soni","orcid":"https://orcid.org/0009-0006-2267-2256"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Puru Soni","raw_affiliation_strings":["Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA"],"raw_orcid":"https://orcid.org/0009-0006-2267-2256","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016060123","display_name":"Hubin Zhao","orcid":"https://orcid.org/0000-0001-9408-4724"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hubin Zhao","raw_affiliation_strings":["Department of Medical Physics and Biomedical Engineering, University College London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0001-9408-4724","affiliations":[{"raw_affiliation_string":"Department of Medical Physics and Biomedical Engineering, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020169895","display_name":"Ming-Chun Huang","orcid":"https://orcid.org/0000-0002-2269-4694"},"institutions":[{"id":"https://openalex.org/I4210159968","display_name":"Duke Kunshan University","ror":"https://ror.org/04sr5ys16","country_code":"CN","type":"education","lineage":["https://openalex.org/I170897317","https://openalex.org/I37461747","https://openalex.org/I4210159968"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming-Chun Huang","raw_affiliation_strings":["Department of Data and Computational Science, Duke Kunshan University, Kunshan, Jiangsu, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data and Computational Science, Duke Kunshan University, Kunshan, Jiangsu, China","institution_ids":["https://openalex.org/I4210159968"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035679293","display_name":"Wenyao Xu","orcid":"https://orcid.org/0000-0001-6444-9411"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenyao Xu","raw_affiliation_strings":["Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA"],"raw_orcid":"https://orcid.org/0000-0001-6444-9411","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA","institution_ids":["https://openalex.org/I63190737"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34635967,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"29","issue":"12","first_page":"8735","last_page":"8742"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.6072999835014343,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.6072999835014343,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.1054999977350235,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.10260000079870224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.765500009059906},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5837000012397766},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5713000297546387},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5356000065803528},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.46939998865127563},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4260999858379364},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4115999937057495},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4065000116825104},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4034000039100647}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8258000016212463},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.765500009059906},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7001000046730042},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5837000012397766},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5713000297546387},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5356000065803528},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.46939998865127563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45969998836517334},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4115999937057495},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4065000116825104},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4034000039100647},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4004000127315521},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.36570000648498535},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.359499990940094},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35019999742507935},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3402000069618225},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.313400000333786},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25270000100135803}],"mesh":[{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000328","descriptor_name":"Adult","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000328","descriptor_name":"Adult","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003143","descriptor_name":"Communication Devices for People with Disabilities","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003143","descriptor_name":"Communication Devices for People with Disabilities","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D004569","descriptor_name":"Electroencephalography","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D004569","descriptor_name":"Electroencephalography","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008297","descriptor_name":"Male","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008297","descriptor_name":"Male","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012815","descriptor_name":"Signal Processing, Computer-Assisted","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012815","descriptor_name":"Signal Processing, Computer-Assisted","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D049250","descriptor_name":"Speech Recognition Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D049250","descriptor_name":"Speech Recognition Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1109/jbhi.2025.3618998","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jbhi.2025.3618998","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},{"id":"pmid:41359692","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41359692","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE journal of biomedical and health informatics","raw_type":null},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10221142","is_oa":false,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10221142/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics , 29  (12)   pp. 8735-8742.   (2025)","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1599001363","https://openalex.org/W2580499487","https://openalex.org/W2978943562","https://openalex.org/W3100732527","https://openalex.org/W3104295320","https://openalex.org/W3157504026","https://openalex.org/W4384008501","https://openalex.org/W4406128038","https://openalex.org/W4408101854"],"related_works":[],"abstract_inverted_index":{"Speech":[0],"recognition":[1,35],"for":[2,203,209],"individuals":[3,210],"with":[4,30,53,168,211],"impairments":[5],"remains":[6],"a":[7,23,135,142,200],"significant":[8,187],"challenge":[9],"due":[10],"to":[11,33,156,189,193],"atypical":[12],"speech":[13,195,212],"patterns":[14],"thatconfound":[15],"traditional":[16],"acoustic-only":[17],"models.":[18],"This":[19],"study":[20],"introduces":[21],"NeuroSpeech,":[22],"novel":[24],"multimodal":[25],"framework":[26],"that":[27,162,196],"integrateselectroencephalography":[28],"(EEG)":[29],"acoustic":[31],"features":[32],"improve":[34],"accuracy,":[36],"robustness,":[37],"and":[38,49,56,78,91,94,97,129,178,206],"efficiency.":[39],"A":[40],"large-scale":[41],"random":[42],"search":[43],"identified":[44],"optimal":[45],"EEG":[46],"encoder":[47],"configurations":[48],"feature":[50],"extraction":[51],"parameters,":[52],"window":[54],"size":[55],"overlap":[57],"($p":[58],"<":[59],"0.001$)":[60],"emerging":[61],"as":[62,134],"critical":[63],"factors.":[64],"Explainable":[65],"AI":[66],"(XAI)":[67],"methods,":[68],"specifically":[69],"SHAP,":[70],"provided":[71],"insights":[72],"into":[73],"model":[74],"decision-making,":[75],"supporting":[76],"interpretability":[77],"clinical":[79],"translation.":[80],"Evaluations":[81],"were":[82],"conducted":[83],"on":[84,110,113,176],"two":[85],"publicly":[86],"available":[87],"datasets:":[88],"Spanish":[89],"commands":[90],"vowels":[92],"(UNLP-CONICET)":[93],"English":[95],"phonemes":[96],"words":[98],"(KaraOne).":[99],"Under":[100],"clean":[101],"conditions,":[102],"NeuroSpeech":[103,163],"achieved":[104],"near-perfect":[105],"accuracy":[106],"($F1":[107,126],"=":[108,120,127],"0.986$":[109],"Spanish;":[111],"0.837":[112],"English),":[114],"while":[115],"in":[116],"noisy":[117],"conditions":[118],"(SNR":[119],"0.5)":[121],"it":[122],"maintained":[123],"strong":[124],"performance":[125],"0.92$":[128],"0.70),":[130],"demonstrating":[131],"EEG's":[132],"role":[133],"noise-robust":[136],"complementary":[137],"signal.":[138],"In":[139],"contrast,":[140],"Whisper,":[141],"state-of-the-art":[143],"ASR":[144],"model,":[145],"showed":[146,161],"severe":[147],"degradation":[148],"under":[149],"noise":[150],"(e.g.,":[151],"$F1$":[152],"dropping":[153],"from":[154],"0.81":[155],"0.46).":[157],"Finally,":[158],"complexity":[159],"analysis":[160],"is":[164,197],"lightweight":[165],"(1-30M":[166],"parameters)":[167],"inference":[169],"latency":[170],"of":[171],"10-18ms/sample":[172],"(RTF":[173],"$<":[174],"1$":[175],"CPU":[177],"GPU),":[179],"enabling":[180],"near-real-time":[181],"deployment.":[182],"These":[183],"results":[184],"demonstrate":[185],"NeuroSpeech's":[186],"potential":[188],"leverage":[190],"neural":[191],"information":[192],"augment":[194],"compromised,":[198],"offering":[199],"promising":[201],"advancement":[202],"assistive":[204],"technologies":[205],"improved":[207],"communication":[208],"disorders.":[213]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-08T00:00:00"}
