{"id":"https://openalex.org/W2987307811","doi":"https://doi.org/10.1109/waspaa.2019.8937202","title":"Intrusive and Non-Intrusive Perceptual Speech Quality Assessment Using a Convolutional Neural Network","display_name":"Intrusive and Non-Intrusive Perceptual Speech Quality Assessment Using a Convolutional Neural Network","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W2987307811","doi":"https://doi.org/10.1109/waspaa.2019.8937202","mag":"2987307811"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa.2019.8937202","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa.2019.8937202","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036868085","display_name":"Hannes Gamper","orcid":"https://orcid.org/0000-0002-2148-6367"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hannes Gamper","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001022750","display_name":"Chandan K. Reddy","orcid":"https://orcid.org/0000-0003-2839-3662"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chandan K A Reddy","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068040769","display_name":"Ross Cutler","orcid":"https://orcid.org/0000-0002-2004-3003"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ross Cutler","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007425970","display_name":"Ivan Tashev","orcid":"https://orcid.org/0000-0002-2263-2047"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ivan J. Tashev","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023537206","display_name":"Johannes Gehrke","orcid":"https://orcid.org/0009-0006-6293-5209"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Johannes Gehrke","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.5089,"has_fulltext":false,"cited_by_count":62,"citation_normalized_percentile":{"value":0.97425285,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"85","last_page":"89"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pesq","display_name":"PESQ","score":0.9638331532478333},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7721163034439087},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7384122014045715},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.708331823348999},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6552446484565735},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6164107918739319},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.507398784160614},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5047849416732788},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4813593029975891},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.44873976707458496},{"id":"https://openalex.org/keywords/telephony","display_name":"Telephony","score":0.43136030435562134},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.4274156391620636},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3266592025756836},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.27305901050567627},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.1475522816181183},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12029621005058289},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10333982110023499},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08726760745048523}],"concepts":[{"id":"https://openalex.org/C103734657","wikidata":"https://www.wikidata.org/wiki/Q2739975","display_name":"PESQ","level":4,"score":0.9638331532478333},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7721163034439087},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7384122014045715},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.708331823348999},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6552446484565735},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6164107918739319},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.507398784160614},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5047849416732788},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4813593029975891},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.44873976707458496},{"id":"https://openalex.org/C195358072","wikidata":"https://www.wikidata.org/wiki/Q944584","display_name":"Telephony","level":2,"score":0.43136030435562134},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.4274156391620636},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3266592025756836},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.27305901050567627},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.1475522816181183},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12029621005058289},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10333982110023499},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08726760745048523},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa.2019.8937202","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa.2019.8937202","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1582976041","https://openalex.org/W1789794644","https://openalex.org/W1955090795","https://openalex.org/W2103934944","https://openalex.org/W2108708552","https://openalex.org/W2111072639","https://openalex.org/W2124541815","https://openalex.org/W2125114513","https://openalex.org/W2342810974","https://openalex.org/W2513383847","https://openalex.org/W2889830381","https://openalex.org/W2922332774","https://openalex.org/W2964121744","https://openalex.org/W6631190155","https://openalex.org/W6638142982"],"related_works":["https://openalex.org/W2988616598","https://openalex.org/W2126270861","https://openalex.org/W2974831423","https://openalex.org/W4285786300","https://openalex.org/W2144085034","https://openalex.org/W59212688","https://openalex.org/W2243478085","https://openalex.org/W1854341237","https://openalex.org/W2166153901","https://openalex.org/W2151415191"],"abstract_inverted_index":{"Speech":[0],"quality,":[1],"as":[2,32,37],"perceived":[3,56],"by":[4,93],"humans,":[5],"is":[6,17,80,107],"an":[7],"important":[8],"performance":[9],"metric":[10],"for":[11,41,103],"telephony":[12],"and":[13,28,34,63,67,71,82,122,139],"voice":[14],"services.":[15],"It":[16,106],"typically":[18],"measured":[19],"through":[20],"subjective":[21,42],"listening":[22,43],"tests,":[23],"which":[24],"can":[25],"be":[26],"tedious":[27],"expensive.":[29],"Algorithms":[30],"such":[31],"PESQ":[33,121],"POLQA":[35,152],"serve":[36],"a":[38,49,73,85,98],"computational":[39],"proxy":[40],"tests.":[44],"Here":[45],"we":[46],"propose":[47],"using":[48],"convolutional":[50],"neural":[51],"network":[52,78],"to":[53,96,109,135,146,149],"predict":[54],"the":[55,128,141,150],"quality":[57,118],"of":[58,87],"speech":[59,117],"with":[60,70],"noise,":[61],"reverberation,":[62],"distortions,":[64],"both":[65],"intrusively":[66],"non-intrusively,":[68],"i.e.,":[69],"without":[72],"clean":[74],"reference":[75],"signal.":[76],"The":[77,124],"model":[79],"trained":[81],"evaluated":[83],"on":[84],"corpus":[86],"about":[88],"ten":[89],"thousand":[90],"utterances":[91],"labeled":[92],"human":[94],"listeners":[95],"derive":[97],"Mean":[99],"Opinion":[100],"Score":[101],"(MOS)":[102],"each":[104],"utterance.":[105],"shown":[108],"provide":[110],"more":[111],"accurate":[112],"MOS":[113,137],"estimates":[114],"than":[115],"existing":[116],"metrics,":[119],"including":[120],"POLQA.":[123],"proposed":[125],"method":[126],"reduces":[127],"root":[129],"mean":[130],"squared":[131],"error":[132],"from":[133,144],"0.48":[134],"0.35":[136],"points":[138],"increases":[140],"Pearson":[142],"correlation":[143],"0.78":[145],"0.89":[147],"compared":[148],"state-of-the-art":[151],"algorithm.":[153]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":17},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
