{"id":"https://openalex.org/W3212576278","doi":"https://doi.org/10.1109/taslp.2021.3126947","title":"End-to-End Neural Based Modification of Noisy Speech for Speech-in-Noise Intelligibility Improvement","display_name":"End-to-End Neural Based Modification of Noisy Speech for Speech-in-Noise Intelligibility Improvement","publication_year":2021,"publication_date":"2021-11-10","ids":{"openalex":"https://openalex.org/W3212576278","doi":"https://doi.org/10.1109/taslp.2021.3126947","mag":"3212576278"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3126947","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3126947","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020314306","display_name":"Muhammed P.V. Shifas","orcid":"https://orcid.org/0000-0002-2449-8507"},"institutions":[{"id":"https://openalex.org/I142617266","display_name":"University of Crete","ror":"https://ror.org/00dr28g20","country_code":"GR","type":"education","lineage":["https://openalex.org/I142617266"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Muhammed P.V. Shifas","raw_affiliation_strings":["Speech Signal Processing Laboratory (SSPL), Department of Computer Science, University of Crete, Heraklion, Greece"],"raw_orcid":"https://orcid.org/0000-0002-2449-8507","affiliations":[{"raw_affiliation_string":"Speech Signal Processing Laboratory (SSPL), Department of Computer Science, University of Crete, Heraklion, Greece","institution_ids":["https://openalex.org/I142617266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044462635","display_name":"C\u0103t\u0103lin Zoril\u0103","orcid":null},"institutions":[{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]},{"id":"https://openalex.org/I4210150981","display_name":"Toshiba (United States)","ror":"https://ror.org/051vpgk97","country_code":"US","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210150981"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Catalin Zorila","raw_affiliation_strings":["Toshiba Cambridge Research Laboratory, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Toshiba Cambridge Research Laboratory, U.K","institution_ids":["https://openalex.org/I4210150981","https://openalex.org/I1292669757"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035745788","display_name":"Yannis Stylianou","orcid":null},"institutions":[{"id":"https://openalex.org/I142617266","display_name":"University of Crete","ror":"https://ror.org/00dr28g20","country_code":"GR","type":"education","lineage":["https://openalex.org/I142617266"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Yannis Stylianou","raw_affiliation_strings":["Speech Signal Processing Laboratory (SSPL), Department of Computer Science, University of Crete, Heraklion, Greece"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech Signal Processing Laboratory (SSPL), Department of Computer Science, University of Crete, Heraklion, Greece","institution_ids":["https://openalex.org/I142617266"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3898,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82057977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"30","issue":null,"first_page":"162","last_page":"173"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10822","display_name":"Acoustic Wave Phenomena Research","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.7346786856651306},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7327730059623718},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6801027059555054},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.4980309009552002},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.46534761786460876},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4367290139198303},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.43036431074142456},{"id":"https://openalex.org/keywords/speech-perception","display_name":"Speech perception","score":0.417322039604187},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4013206958770752},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.3144981861114502},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.2579096555709839},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20238515734672546},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08544528484344482}],"concepts":[{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.7346786856651306},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7327730059623718},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6801027059555054},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.4980309009552002},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.46534761786460876},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4367290139198303},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.43036431074142456},{"id":"https://openalex.org/C99209842","wikidata":"https://www.wikidata.org/wiki/Q643696","display_name":"Speech perception","level":3,"score":0.417322039604187},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4013206958770752},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3144981861114502},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2579096555709839},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20238515734672546},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08544528484344482},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2021.3126947","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3126947","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W95868112","https://openalex.org/W1522301498","https://openalex.org/W1897240248","https://openalex.org/W1934478182","https://openalex.org/W1972481648","https://openalex.org/W1977775600","https://openalex.org/W1986226432","https://openalex.org/W1997307245","https://openalex.org/W2022880580","https://openalex.org/W2056116740","https://openalex.org/W2075076415","https://openalex.org/W2078528584","https://openalex.org/W2128653836","https://openalex.org/W2129387557","https://openalex.org/W2141845431","https://openalex.org/W2149948223","https://openalex.org/W2153185504","https://openalex.org/W2157989118","https://openalex.org/W2163010125","https://openalex.org/W2168554752","https://openalex.org/W2171670416","https://openalex.org/W2171878011","https://openalex.org/W2293006933","https://openalex.org/W2404548911","https://openalex.org/W2405774341","https://openalex.org/W2514741789","https://openalex.org/W2558539285","https://openalex.org/W2626799581","https://openalex.org/W2748401291","https://openalex.org/W2749552851","https://openalex.org/W2771725120","https://openalex.org/W2889034131","https://openalex.org/W2889442120","https://openalex.org/W2890983311","https://openalex.org/W2962843322","https://openalex.org/W2963103134","https://openalex.org/W2963453742","https://openalex.org/W2963840672","https://openalex.org/W2973196014","https://openalex.org/W3035279149","https://openalex.org/W3096949857","https://openalex.org/W3100032392","https://openalex.org/W3103840368","https://openalex.org/W3209141406","https://openalex.org/W4234195863","https://openalex.org/W6628803911","https://openalex.org/W6631190155","https://openalex.org/W6632636894","https://openalex.org/W6632656430","https://openalex.org/W6696085341","https://openalex.org/W6779315888","https://openalex.org/W6785561240","https://openalex.org/W6802860302","https://openalex.org/W6843673214"],"related_works":["https://openalex.org/W1986772939","https://openalex.org/W2037635165","https://openalex.org/W3000153094","https://openalex.org/W2542062716","https://openalex.org/W2231565466","https://openalex.org/W3043214604","https://openalex.org/W1505346162","https://openalex.org/W2120771489","https://openalex.org/W2738829087","https://openalex.org/W4200596008"],"abstract_inverted_index":{"Intelligibility":[0],"of":[1,28,35,178],"speech":[2,29,65,124,160],"can":[3],"be":[4],"significantly":[5],"reduced":[6],"when":[7,51],"it":[8],"is":[9,66,110,131,136,154],"presented":[10],"in":[11,30,53,95,115,162,207,222],"adverse":[12,96],"near-end":[13],"listening":[14],"conditions,":[15],"like":[16,57],"background":[17],"noise.":[18],"Multiple":[19],"approaches":[20,37],"have":[21,48],"been":[22],"suggested":[23,195,215],"to":[24,40,227,238],"improve":[25],"the":[26,80,86,91,134,159,169,187,194,214,247],"perception":[27],"such":[31],"conditions.":[32,97],"However,":[33],"most":[34],"these":[36],"were":[38],"designed":[39],"work":[41],"with":[42,106,186,246],"clean":[43],"input":[44,64,87,192,232,243],"speech.":[45],"Therefore,":[46],"they":[47],"serious":[49],"limitations":[50],"deployed":[52],"real":[54],"world":[55],"applications":[56],"telephony":[58],"and":[59,89,112,118,166,202,209,234],"hearing":[60],"aids,":[61],"where":[62,133],"noisy":[63],"quite":[67],"common.":[68],"In":[69],"this":[70],"paper":[71],"we":[72],"present":[73],"an":[74],"end-to-end":[75],"neural":[76,103,196],"network":[77,104,197],"approach":[78],"for":[79,93],"above":[81],"problem,":[82],"which":[83],"effectively":[84],"reduces":[85],"noise":[88,174,224],"improves":[90],"intelligibility":[92,140,161],"listeners":[94],"To":[98],"that":[99],"end,":[100],"a":[101,116,119,137,183],"convolutional":[102],"topology":[105],"variable":[107],"dilation":[108],"factors":[109],"proposed":[111],"evaluated":[113],"both":[114,156],"causal":[117],"non-causal":[120],"configuration":[121],"using":[122,158],"raw":[123],"as":[125,182],"input.":[126],"A":[127,173],"Teacher-Student":[128],"training":[129],"strategy":[130],"employed,":[132],"Teacher":[135],"well-established":[138],"speech-in-noise":[139],"enhancer":[141],"based":[142],"on":[143,168],"spectral":[144],"shaping":[145],"followed":[146],"by":[147],"dynamic":[148],"range":[149],"compression":[150],"(SSDRC).":[151],"The":[152],"evaluation":[153],"performed":[155],"objectively":[157],"bits":[163],"metric":[164],"(SIIB),":[165],"subjectively":[167],"Greek":[170],"Harvard":[171],"corpus.":[172],"robust":[175],"multi-band":[176],"version":[177],"SSDRC":[179],"was":[180],"used":[181],"baseline.":[184,248],"Compared":[185],"baseline,":[188],"at":[189,229,240],"0":[190,230],"dB":[191,231,242],"SNR,":[193,233,244],"system":[198],"achieved":[199],"about":[200,236],"380%":[201],"230%":[203],"relative":[204],"SIIB":[205],"improvements":[206],"fluctuating":[208],"stationary":[210,223],"backgrounds,":[211],"respectively.":[212],"Subjectively,":[213],"model":[216],"increased":[217],"listeners\u2019":[218],"keyword":[219],"correct":[220],"rate":[221],"from":[225,235],"25%":[226],"60%":[228],"52%":[237],"75%":[239],"5":[241],"compared":[245]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
