{"id":"https://openalex.org/W3115527998","doi":"https://doi.org/10.1109/mmsp48831.2020.9287142","title":"A Low Complexity Long Short-Term Memory Based Voice Activity Detection","display_name":"A Low Complexity Long Short-Term Memory Based Voice Activity Detection","publication_year":2020,"publication_date":"2020-09-21","ids":{"openalex":"https://openalex.org/W3115527998","doi":"https://doi.org/10.1109/mmsp48831.2020.9287142","mag":"3115527998"},"language":"en","primary_location":{"id":"doi:10.1109/mmsp48831.2020.9287142","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp48831.2020.9287142","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 22nd International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101914357","display_name":"Ruiting Yang","orcid":"https://orcid.org/0000-0002-9634-6238"},"institutions":[{"id":"https://openalex.org/I4210109220","display_name":"Harman (China)","ror":"https://ror.org/01k2z4866","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778","https://openalex.org/I4210109220","https://openalex.org/I4210131281"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruiting Yang","raw_affiliation_strings":["Harman International, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harman International, Shenzhen, China","institution_ids":["https://openalex.org/I4210109220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100454195","display_name":"Jie Liu","orcid":"https://orcid.org/0000-0003-1983-5086"},"institutions":[{"id":"https://openalex.org/I4210109220","display_name":"Harman (China)","ror":"https://ror.org/01k2z4866","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778","https://openalex.org/I4210109220","https://openalex.org/I4210131281"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Liu","raw_affiliation_strings":["Harman International, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harman International, Shenzhen, China","institution_ids":["https://openalex.org/I4210109220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043879150","display_name":"Xiang Deng","orcid":"https://orcid.org/0000-0002-5957-1866"},"institutions":[{"id":"https://openalex.org/I4210109220","display_name":"Harman (China)","ror":"https://ror.org/01k2z4866","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778","https://openalex.org/I4210109220","https://openalex.org/I4210131281"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Deng","raw_affiliation_strings":["Harman International, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harman International, Shenzhen, China","institution_ids":["https://openalex.org/I4210109220"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058660837","display_name":"Zhuochao Zheng","orcid":"https://orcid.org/0000-0003-1842-762X"},"institutions":[{"id":"https://openalex.org/I4210109220","display_name":"Harman (China)","ror":"https://ror.org/01k2z4866","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778","https://openalex.org/I4210109220","https://openalex.org/I4210131281"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuochao Zheng","raw_affiliation_strings":["Harman International, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harman International, Shenzhen, China","institution_ids":["https://openalex.org/I4210109220"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101914357"],"corresponding_institution_ids":["https://openalex.org/I4210109220"],"apc_list":null,"apc_paid":null,"fwci":0.3031,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.56359447,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8305664658546448},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.813427209854126},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.7738372087478638},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7139440774917603},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.6802910566329956},{"id":"https://openalex.org/keywords/long-short-term-memory","display_name":"Long short term memory","score":0.6177529692649841},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.572177529335022},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.5626857876777649},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5149374604225159},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.4817885756492615},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4764174222946167},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.45799002051353455},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.45639297366142273},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4455571174621582},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43972691893577576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43763643503189087},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4355742037296295},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4318747818470001},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.41667795181274414},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.41075512766838074},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3470364511013031},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.2440764605998993},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.17212116718292236},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.15014705061912537}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8305664658546448},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.813427209854126},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.7738372087478638},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7139440774917603},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.6802910566329956},{"id":"https://openalex.org/C133488467","wikidata":"https://www.wikidata.org/wiki/Q6673524","display_name":"Long short term memory","level":4,"score":0.6177529692649841},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.572177529335022},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.5626857876777649},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5149374604225159},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4817885756492615},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4764174222946167},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.45799002051353455},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.45639297366142273},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4455571174621582},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43972691893577576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43763643503189087},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4355742037296295},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4318747818470001},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.41667795181274414},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.41075512766838074},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3470364511013031},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.2440764605998993},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.17212116718292236},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.15014705061912537},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mmsp48831.2020.9287142","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp48831.2020.9287142","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 22nd International Workshop on Multimedia Signal Processing (MMSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W36345725","https://openalex.org/W55930003","https://openalex.org/W296599067","https://openalex.org/W585519466","https://openalex.org/W891534129","https://openalex.org/W1483830027","https://openalex.org/W1494198834","https://openalex.org/W1573166544","https://openalex.org/W1978274651","https://openalex.org/W1999454387","https://openalex.org/W2007119422","https://openalex.org/W2023582935","https://openalex.org/W2032474878","https://openalex.org/W2048497537","https://openalex.org/W2050758723","https://openalex.org/W2059203007","https://openalex.org/W2064675550","https://openalex.org/W2079735306","https://openalex.org/W2097036989","https://openalex.org/W2103749544","https://openalex.org/W2115717467","https://openalex.org/W2128838222","https://openalex.org/W2129120544","https://openalex.org/W2149053750","https://openalex.org/W2156038773","https://openalex.org/W2157363431","https://openalex.org/W2170644918","https://openalex.org/W2171875099","https://openalex.org/W2240641835","https://openalex.org/W2528063314","https://openalex.org/W2550397165","https://openalex.org/W2618825466","https://openalex.org/W2892300106","https://openalex.org/W2900030058","https://openalex.org/W2917987043","https://openalex.org/W2962866211","https://openalex.org/W2980286501","https://openalex.org/W3147539069","https://openalex.org/W4250499635","https://openalex.org/W6602294394","https://openalex.org/W6624013145","https://openalex.org/W6738603831","https://openalex.org/W6768798221"],"related_works":["https://openalex.org/W2049648127","https://openalex.org/W1852231985","https://openalex.org/W2080325429","https://openalex.org/W2184306570","https://openalex.org/W2351647310","https://openalex.org/W2140099343","https://openalex.org/W2115277869","https://openalex.org/W1572861854","https://openalex.org/W2341426843","https://openalex.org/W1984921740"],"abstract_inverted_index":{"Voice":[0],"Activity":[1],"Detection":[2],"(VAD)":[3],"plays":[4],"an":[5,32],"important":[6],"role":[7],"in":[8,74,89,132],"audio":[9],"processing,":[10],"but":[11],"it":[12,69],"is":[13,22,47,107],"also":[14],"a":[15,19,39],"common":[16],"challenge":[17],"when":[18],"voice":[20],"signal":[21],"corrupted":[23],"with":[24],"strong":[25],"and":[26,34,58,78,83,96,130],"transient":[27],"noise.":[28],"In":[29],"this":[30],"paper,":[31],"accurate":[33],"causal":[35],"VAD":[36,106],"module":[37],"using":[38],"long":[40],"short-term":[41],"memory":[42],"(LSTM)":[43],"deep":[44],"neural":[45],"network":[46],"proposed.":[48],"A":[49],"set":[50],"of":[51,92,115],"features":[52,61],"including":[53,123],"Gammatone":[54],"cepstral":[55],"coefficients":[56],"(GTCC)":[57],"selected":[59],"spectral":[60],"are":[62,135],"used.":[63],"The":[64],"low":[65],"complex":[66],"structure":[67],"allows":[68],"can":[70],"be":[71],"easily":[72],"implemented":[73],"speech":[75,93,111,128],"processing":[76],"algorithms":[77],"applications.":[79],"With":[80],"carefully":[81],"pre-processing":[82],"labeling":[84],"the":[85,90,99,104],"collected":[86],"training":[87,97],"data":[88],"classes":[91],"or":[94],"non-speech":[95],"on":[98],"LSTM":[100],"net,":[101],"experiments":[102],"show":[103],"proposed":[105],"able":[108],"to":[109],"distinguish":[110],"from":[112],"different":[113,133],"types":[114],"noisy":[116],"background":[117],"effectively.":[118],"Its":[119],"robustness":[120],"against":[121],"changes":[122],"varying":[124],"frame":[125],"length,":[126],"moving":[127],"sources":[129],"speaking":[131],"languages,":[134],"further":[136],"investigated.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-25T21:42:39.735039","created_date":"2025-10-10T00:00:00"}
