{"id":"https://openalex.org/W310658760","doi":"https://doi.org/10.21437/eurospeech.2001-384","title":"Robust automatic speech recognition in low-SNR car environments by the application of a connectionist subspace-based approach to the melbased cepstral coefficients","display_name":"Robust automatic speech recognition in low-SNR car environments by the application of a connectionist subspace-based approach to the melbased cepstral coefficients","publication_year":2001,"publication_date":"2001-09-03","ids":{"openalex":"https://openalex.org/W310658760","doi":"https://doi.org/10.21437/eurospeech.2001-384","mag":"310658760"},"language":"en","primary_location":{"id":"doi:10.21437/eurospeech.2001-384","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.2001-384","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"7th European Conference on Speech Communication and Technology (Eurospeech 2001)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055849526","display_name":"Sid\u2010Ahmed Selouani","orcid":"https://orcid.org/0000-0003-0731-2632"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sid-Ahmed Selouani","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110302454","display_name":"Hesham Tolba","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hesham Tolba","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084152968","display_name":"Douglas O\u0092Shaughnessy","orcid":null},"institutions":[{"id":"https://openalex.org/I49663120","display_name":"Universit\u00e9 du Qu\u00e9bec","ror":"https://ror.org/010gxg263","country_code":"CA","type":"education","lineage":["https://openalex.org/I49663120"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Douglas O\u0092Shaughnessy","raw_affiliation_strings":["Univ. of Qu\u00e9bec"],"affiliations":[{"raw_affiliation_string":"Univ. of Qu\u00e9bec","institution_ids":["https://openalex.org/I49663120"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5055849526"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3031,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.56575279,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1577","last_page":"1580"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7113584280014038},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6677536964416504},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.6002070903778076},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5952885150909424},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5628310441970825},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.5550560355186462},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5517088770866394},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5296242833137512},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4693544805049896},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.4692169427871704},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4632467031478882},{"id":"https://openalex.org/keywords/multilayer-perceptron","display_name":"Multilayer perceptron","score":0.4515010416507721},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.44919347763061523},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4304966330528259},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4227410852909088},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.29273784160614014},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.26723650097846985}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7113584280014038},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6677536964416504},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.6002070903778076},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5952885150909424},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5628310441970825},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.5550560355186462},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5517088770866394},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5296242833137512},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4693544805049896},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.4692169427871704},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4632467031478882},{"id":"https://openalex.org/C179717631","wikidata":"https://www.wikidata.org/wiki/Q2991667","display_name":"Multilayer perceptron","level":3,"score":0.4515010416507721},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.44919347763061523},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4304966330528259},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4227410852909088},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.29273784160614014},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26723650097846985},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/eurospeech.2001-384","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.2001-384","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"7th European Conference on Speech Communication and Technology (Eurospeech 2001)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W22549796","https://openalex.org/W192531825","https://openalex.org/W1488642479","https://openalex.org/W2023963201","https://openalex.org/W2038054257","https://openalex.org/W2080921589","https://openalex.org/W2101349570","https://openalex.org/W2109816922","https://openalex.org/W2144521324","https://openalex.org/W2467500531"],"related_works":["https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W2387604097","https://openalex.org/W2373675101","https://openalex.org/W4385672897","https://openalex.org/W106160982","https://openalex.org/W2359140082","https://openalex.org/W2074132948","https://openalex.org/W2160511961","https://openalex.org/W2597829360"],"abstract_inverted_index":{"ABSTRACTIn":[0],"this":[1,210,313,368,426],"paper,":[2,211,369],"the":[3,12,29,37,56,66,87,95,99,127,136,172,190,216,304,316,323,326,349,390,397,402,457,465,468,472,491,494,496,502,505,514,522,525,550,559,562,569,573,584,587,596,602,608,622],"problem":[4],"of":[5,14,28,46,86,98,135,159,171,177,192,196,218,246,306,318,336,359,401,410,449,467,493,504,516,561,575,619],"robust":[6,230,360,374],"large-vocabulary":[7],"continuous-speech":[8],"recognition":[9,96,138,149],"(CSR)":[10],"in":[11,55,59,102,126,150,303,364,389,417,425,487,499,528,534,564,568,591,595],"presence":[13,191,305],"highly":[15,103,283,565],"interferingcar":[16],"noise":[17,53,106,206,252,386,436,585],"has":[18,414,554],"been":[19,202,259,555],"considered.":[20],"Our":[21,383],"approach":[22,93,384,444],"is":[23,42,233,394,572,621],"based":[24,140,395],"on":[25,141,184,214,396,430],"thenoise":[26],"reduction":[27,387,437],"parameters":[30,89,269,337,361],"that":[31,134,291,329,433,474,553,583],"we":[32,212,370,463,485,537],"use":[33,45,85,574],"for":[34,52,155,204,294,385],"recognition,that":[35],"is,":[36],"Mel-based":[38],"cepstral":[39,57,292,391,435,570,597],"coef\ufb01cients.":[40],"This":[41],"achieved":[43],"bythe":[44],"a":[47,143,156,168,193,219,223,372,434,509,518,576,592],"Multilayer":[48,545],"Perceptron":[49,546],"(MLP)":[50,579],"network":[51,470],"re-duction":[54],"domain":[58,571],"order":[60],"to":[61,234,244,250,261,273,276,285,341,377,478,520,557,600,612],"get":[62],"less-variant":[63],"pa-rameters.":[64],"Then,":[65,484],"obtained":[67,440],"enhanced":[68,88],"features":[69,237,264,281],"are":[70,270,282,297,339,589],"re\ufb01ned":[71],"viathe":[72],"Karhunen-Lo`eve":[73,403],"Transform":[74,404],"(KLT)":[75,405],"implemented":[76],"using":[77,90,167],"thePrincipal":[78],"Component":[79],"Analysis":[80],"(PCA).":[81],"Experiments":[82],"show":[83,119,424],"thatthe":[84,120],"such":[91,442,517],"an":[92,129,146,319,443,447],"in-creases":[94],"rate":[97],"CSR":[100,131,179,320,375],"process":[101,139],"inter-fering":[104],"car":[105,153,380,511],"environments.":[107,382],"The":[108,227,581,617],"HTK":[109],"Hidden":[110],"Markov":[111],"ModelToolkit":[112],"was":[113,288],"used":[114,260,416,477,498],"throughout":[115],"our":[116,480,500,540],"experiments.":[117],"Results":[118],"proposed":[121,481,506,556],"hybrid":[122],"technique":[123],"when":[124],"included":[125],"front-endof":[128],"HTK-based":[130],"system,":[132],"outperforms":[133],"conven-tional":[137],"either":[142,348],"KLT-":[144],"or":[145,253,352],"MLP-based":[147],"preprocessing":[148],"severe":[151],"interfering":[152,251],"noiseenvironments":[154],"wide":[157],"range":[158],"SNRs":[160],"varying":[161],"from":[162,333],"16":[163],"dB":[164,166],"to-4":[165],"noisy":[169,381,510,566,623],"version":[170],"TIMIT":[173],"database.1.":[174],"INTRODUCTIONThe":[175],"performance":[176,217,560],"existing":[178],"systems,":[180],"whose":[181],"designs":[182],"arepredicated":[183],"relatively":[185],"noise-free":[186],"conditions,":[187],"degrades":[188,315],"rapidlyin":[189],"high":[194],"level":[195],"adverse":[197,350],"conditions.":[198],"Several":[199],"ap-proaches":[200,256],"have":[201,258],"studied":[203],"achieving":[205],"robustness":[207],"[1,":[208],"2].In":[209],"focus":[213],"optimizing":[215],"CSRsystem":[220],"by":[221,347],"choosing":[222],"suitable":[224],"distortion":[225,345],"measure.":[226],"idea":[228],"ofa":[229,399],"distance":[231],"measure":[232],"extract":[235,262],"relevant":[236,263],"fromspeech":[238],"signals":[239],"which":[240,338],"must":[241],"be":[242,378,439,454,476],"insensitive":[243],"degradations":[245],"thespeech":[247],"signal":[248],"due":[249,275],"distortions.":[254],"Many":[255],"[3]":[257],"froma":[265],"speech":[266,419],"signal.":[267],"Cepstral":[268],"well":[271,298],"suited":[272],"speechrecognition":[274],"their":[277,308],"compact":[278],"orthogonality.":[279],"Unfortunately,cepstral":[280],"sensitive":[284],"noise.":[286],"It":[287,393],"shown":[289],"in[4]":[290],"distributions":[293],"clean":[295],"data":[296,432],"behaved":[299],"andapproximately":[300],"normal,":[301],"but":[302],"noise,":[307],"pro\ufb01lesare":[309],"changed":[310],"signi\ufb01cantly":[311],"and":[312,406,421,445,471,513,524,586],"consequently":[314,446],"per-formance":[317],"system.":[321],"However,":[322],"cepstrum":[324],"coef\ufb01cientshave":[325],"additional":[327],"advantage":[328],"one":[330],"can":[331,438,606],"derive":[332],"them":[334],"aset":[335],"invariant":[340],"any":[342],"\ufb01xed":[343],"frequency-response":[344],"introduced":[346,363],"environ-ments":[351],"thetransmission":[353],"channels.":[354],"Severalapproaches":[355],"toobtaina":[356],"new":[357],"set":[358],"were":[362],"[5,":[365],"6,":[366],"7].In":[367],"propose":[371],"novel":[373],"system":[376],"usedin":[379],"isapplied":[388],"domain.":[392],"application":[398],"combination":[400],"aConnectionist":[407],"approach.":[408,483],"Each":[409],"these":[411],"two":[412],"approaches":[413],"beensuccessfully":[415],"both":[418,521],"enhancement":[420],"recognitionprocesses.":[422],"We":[423],"paper":[427,452],"through":[428],"experiments":[429],"highlynoisy":[431],"us-ing":[441],"improvement":[448],"therecognition":[450],"performance.This":[451],"will":[453,475],"organized":[455],"into":[456],"following":[458],"sections.":[459],"In":[460],"sec-tion":[461],"2":[462],"describe":[464,479],"basis":[466],"MLP":[469],"PCAapproaches":[473],"hybridPCA-MLP":[482],"proceed":[486],"section":[488,535],"3":[489],"with":[490],"de-scription":[492],"database,":[495],"platform":[497],"experimentsand":[501],"evaluation":[503],"MLP-PCA-based":[507],"recognizerin":[508],"environment":[512],"comparison":[515],"recog-nizer":[519],"MLP-":[523],"PCA-based":[526],"recognizers":[527],"orderto":[529],"evaluate":[530],"its":[531],"performance.":[532],"Finally,":[533],"5":[536],"concludeand":[538],"discuss":[539],"results.2.":[541],"PROPOSED":[542],"ENHANCEMENT":[543],"APPROACH2.1.":[544],"NetworkAs":[547],"mentioned":[548],"above,":[549],"\ufb01rst":[551],"step":[552],"im-prove":[558],"CSRprocess":[563],"caren-vironments":[567],"multilayer":[577],"per-ceptron":[578],"network.":[580],"fact":[582],"speechsignal":[588],"combined":[590],"nonlinear":[593,610],"way":[594],"domainmotivated":[598],"us":[599],"choose":[601],"MLP,":[603],"since":[604],"it":[605],"approximate":[607],"re-quired":[609],"function":[611],"some":[613],"extent":[614],"[6,":[615],"7].":[616],"input":[618],"theMLP":[620],"MFCC":[624],"vector":[625]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
