{"id":"https://openalex.org/W2396843779","doi":"https://doi.org/10.21437/interspeech.2014-162","title":"Noise spectrum estimation using Gaussian mixture model-based speech presence probability for robust speech recognition","display_name":"Noise spectrum estimation using Gaussian mixture model-based speech presence probability for robust speech recognition","publication_year":2014,"publication_date":"2014-09-14","ids":{"openalex":"https://openalex.org/W2396843779","doi":"https://doi.org/10.21437/interspeech.2014-162","mag":"2396843779"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2014-162","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-162","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088749621","display_name":"Jahangir Alam","orcid":"https://orcid.org/0000-0003-4174-9862"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M. J. Alam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036466893","display_name":"Patrick Kenny","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patrick Kenny","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034083532","display_name":"Pierre Dumouchel","orcid":"https://orcid.org/0000-0001-5584-4428"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pierre Dumouchel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5011489869","display_name":"Douglas O\u2019Shaughnessy","orcid":"https://orcid.org/0000-0002-0110-2346"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Douglas O'Shaughnessy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2949,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62064045,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2759","last_page":"2763"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7498965263366699},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.6837478876113892},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6197234392166138},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5665568113327026},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5353387594223022},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5139655470848083},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.47471821308135986},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4645644724369049},{"id":"https://openalex.org/keywords/maximum-a-posteriori-estimation","display_name":"Maximum a posteriori estimation","score":0.4247472882270813},{"id":"https://openalex.org/keywords/gaussian-noise","display_name":"Gaussian noise","score":0.4197508692741394},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3807632327079773},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2972185015678406},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.28154683113098145},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.20375320315361023},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13835176825523376},{"id":"https://openalex.org/keywords/maximum-likelihood","display_name":"Maximum likelihood","score":0.08870604634284973}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7498965263366699},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.6837478876113892},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6197234392166138},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5665568113327026},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5353387594223022},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5139655470848083},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.47471821308135986},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4645644724369049},{"id":"https://openalex.org/C9810830","wikidata":"https://www.wikidata.org/wiki/Q635384","display_name":"Maximum a posteriori estimation","level":3,"score":0.4247472882270813},{"id":"https://openalex.org/C4199805","wikidata":"https://www.wikidata.org/wiki/Q2725903","display_name":"Gaussian noise","level":2,"score":0.4197508692741394},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3807632327079773},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2972185015678406},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28154683113098145},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.20375320315361023},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13835176825523376},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.08870604634284973},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2014-162","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-162","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W47839664","https://openalex.org/W72012584","https://openalex.org/W1492775261","https://openalex.org/W1533883318","https://openalex.org/W1969929748","https://openalex.org/W1974306283","https://openalex.org/W1995190897","https://openalex.org/W2088489891","https://openalex.org/W2103167563","https://openalex.org/W2115299561","https://openalex.org/W2142170306","https://openalex.org/W2144739192","https://openalex.org/W2146324387","https://openalex.org/W2148154194","https://openalex.org/W2158336491","https://openalex.org/W2169147844","https://openalex.org/W2169890399","https://openalex.org/W2401185733"],"related_works":["https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W2387604097","https://openalex.org/W2373675101","https://openalex.org/W4385672897","https://openalex.org/W106160982","https://openalex.org/W2359140082","https://openalex.org/W2074132948","https://openalex.org/W2160511961","https://openalex.org/W334008168"],"abstract_inverted_index":{"This":[0],"work":[1],"presents":[2],"a":[3,30,32,48],"noise":[4,23,85,112,176,190],"spectrum":[5,24,55,86,119],"estimator":[6],"based":[7,44],"on":[8,45,99],"the":[9,53,57,64,83,94,100],"Gaussian":[10],"mixture":[11],"model":[12],"(GMM)-based":[13],"speech":[14,20,54,97,103,146],"presence":[15],"probability":[16],"(SPP)":[17],"for":[18,70,142],"robust":[19,71,133],"recognition.":[21],"Estimated":[22],"is":[25,42,61,91,185],"then":[26],"used":[27,62],"to":[28,51],"compute":[29],"subband":[31,47],"posteriori":[33,49],"signal-to-noise":[34],"ratio":[35],"(SNR).":[36],"A":[37],"sigmoid":[38],"shape":[39],"weighting":[40],"rule":[41],"formed":[43],"this":[46,116],"SNR":[50],"enhance":[52],"in":[56,63,93,151,193],"auditory":[58,117],"domain,":[59],"which":[60],"Mel-frequency":[65],"cepstral":[66,129,136],"coefficient":[67],"(MFCC)":[68],"framework":[69],"feature,":[72],"denoted":[73],"here":[74],"as":[75],"Robust":[76],"MFCC":[77],"(RMFCC)":[78],"extraction.":[79],"The":[80,122],"performance":[81],"of":[82,96,153,162,183,195],"GMM-SPP":[84,174],"estimator-based":[87],"RMFCC":[88,156],"feature":[89],"extractor":[90],"evaluated":[92],"context":[95],"recognition":[98,104,147,197],"AURORA-4":[101],"continuous":[102],"task.":[105],"For":[106],"comparison":[107,143],"we":[108],"incorporate":[109],"six":[110,189],"existing":[111],"estimation":[113,177,191],"methods":[114,192],"into":[115],"domain":[118],"enhancement":[120],"framework.":[121],"ETSI":[123],"advanced":[124],"frontend":[125],"(ETSI-AFE),":[126],"power":[127],"normalized":[128],"coefficients":[130,137],"(PNCC),":[131],"and":[132,165,170],"compressive":[134],"gammachirp":[135],"(RCGCC)":[138],"are":[139],"also":[140],"considered":[141],"purposes.":[144],"Experimental":[145],"results":[148],"show":[149],"that,":[150],"terms":[152,194],"word":[154,196],"accuracy,":[155],"provides":[157],"an":[158,179],"average":[159,180],"relative":[160,181],"improvements":[161],"8.1%,":[163],"6.9%":[164],"6.6%":[166],"over":[167,187],"RCGCC,":[168],"ETSI-AFE,":[169],"PNCC,":[171],"respectively.":[172],"With":[173],"-based":[175],"method":[178],"improvement":[182],"3.6%":[184],"obtained":[186],"other":[188],"accuracy.":[198]},"counts_by_year":[{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
