{"id":"https://openalex.org/W4386764234","doi":"https://doi.org/10.1109/waspaa58266.2023.10248067","title":"Single Channel Speech Presence Probability Estimation based on Hybrid Global-Local Information","display_name":"Single Channel Speech Presence Probability Estimation based on Hybrid Global-Local Information","publication_year":2023,"publication_date":"2023-09-15","ids":{"openalex":"https://openalex.org/W4386764234","doi":"https://doi.org/10.1109/waspaa58266.2023.10248067"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa58266.2023.10248067","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/waspaa58266.2023.10248067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007499353","display_name":"Shuai Tao","orcid":"https://orcid.org/0000-0003-2223-0205"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Shuai Tao","raw_affiliation_strings":["Aalborg University,Audio Analysis Lab,Aalborg,Denmark","Audio Analysis Lab, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalborg University,Audio Analysis Lab,Aalborg,Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Audio Analysis Lab, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029253113","display_name":"Yang Xiang","orcid":"https://orcid.org/0000-0002-7120-5842"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Yang Xiang","raw_affiliation_strings":["Aalborg University,Audio Analysis Lab,Aalborg,Denmark","Audio Analysis Lab, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalborg University,Audio Analysis Lab,Aalborg,Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Audio Analysis Lab, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078791449","display_name":"Himavanth Reddy","orcid":null},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Himavanth Reddy","raw_affiliation_strings":["Aalborg University,Audio Analysis Lab,Aalborg,Denmark","Audio Analysis Lab, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalborg University,Audio Analysis Lab,Aalborg,Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Audio Analysis Lab, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041263382","display_name":"Jesper Rindom Jensen","orcid":"https://orcid.org/0000-0001-6023-8270"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Jesper Rindom Jensen","raw_affiliation_strings":["Aalborg University,Audio Analysis Lab,Aalborg,Denmark","Audio Analysis Lab, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalborg University,Audio Analysis Lab,Aalborg,Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Audio Analysis Lab, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026658144","display_name":"Mads Gr\u00e6sb\u00f8ll Christensen","orcid":"https://orcid.org/0000-0003-3586-7969"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Mads Gr\u00e6sb\u00f8ll Christensen","raw_affiliation_strings":["Aalborg University,Audio Analysis Lab,Aalborg,Denmark","Audio Analysis Lab, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aalborg University,Audio Analysis Lab,Aalborg,Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Audio Analysis Lab, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1848,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.43407206,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.7724125385284424},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7540392875671387},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5759420990943909},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5444714426994324},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5238870978355408},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.45677298307418823},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.45273086428642273},{"id":"https://openalex.org/keywords/bin","display_name":"Bin","score":0.4424261748790741},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4278654456138611},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3812134861946106},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37530460953712463},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.16320368647575378},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1596408188343048},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.09280738234519958},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09245938062667847},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07609176635742188}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.7724125385284424},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7540392875671387},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5759420990943909},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5444714426994324},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5238870978355408},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.45677298307418823},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.45273086428642273},{"id":"https://openalex.org/C156273044","wikidata":"https://www.wikidata.org/wiki/Q4913766","display_name":"Bin","level":2,"score":0.4424261748790741},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4278654456138611},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3812134861946106},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37530460953712463},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.16320368647575378},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1596408188343048},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.09280738234519958},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09245938062667847},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07609176635742188},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/waspaa58266.2023.10248067","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/waspaa58266.2023.10248067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/b7d073df-1198-44d1-99a4-04db6496cb58","is_oa":false,"landing_page_url":"http://www.scopus.com/inward/record.url?scp=85173013469&partnerID=8YFLogxK","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Tao , S , Xiang , Y , Reddy , H , Jensen , J R &amp; Christensen , M G 2023 , Single Channel Speech Presence Probability Estimation based on Hybrid Global-Local Information . in Proceedings of the 2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, WASPAA 2023 . , 10248067 , IEEE , IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA) , pp. 1-5 , 2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA) , 22/10/2023 . https://doi.org/10.1109/WASPAA58266.2023.10248067","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.atira.dk:publications/b7d073df-1198-44d1-99a4-04db6496cb58","is_oa":false,"landing_page_url":"https://vbn.aau.dk/da/publications/b7d073df-1198-44d1-99a4-04db6496cb58","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Tao, S, Xiang, Y, Reddy, H, Jensen, J R & Christensen, M G 2023, Single Channel Speech Presence Probability Estimation based on Hybrid Global-Local Information. in Proceedings of the 2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, WASPAA 2023., 10248067, IEEE (Institute of Electrical and Electronics Engineers), IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), pp. 1-5, 2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), 22/10/2023. https://doi.org/10.1109/WASPAA58266.2023.10248067","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W196214544","https://openalex.org/W1522301498","https://openalex.org/W2025188156","https://openalex.org/W2033875152","https://openalex.org/W2040909219","https://openalex.org/W2051428568","https://openalex.org/W2108819501","https://openalex.org/W2118354544","https://openalex.org/W2121894251","https://openalex.org/W2126942983","https://openalex.org/W2127292597","https://openalex.org/W2146324387","https://openalex.org/W2155998647","https://openalex.org/W2158336491","https://openalex.org/W2169147844","https://openalex.org/W2398042854","https://openalex.org/W2593116425","https://openalex.org/W2781626870","https://openalex.org/W2890075319","https://openalex.org/W2972592847","https://openalex.org/W2976594877","https://openalex.org/W3016267250","https://openalex.org/W3017350693","https://openalex.org/W3109458322","https://openalex.org/W4225905067","https://openalex.org/W4375868852","https://openalex.org/W6607974698","https://openalex.org/W6631190155","https://openalex.org/W6747417605"],"related_works":["https://openalex.org/W2107701374","https://openalex.org/W1616588898","https://openalex.org/W4395000504","https://openalex.org/W4249504934","https://openalex.org/W2183416055","https://openalex.org/W2568867011","https://openalex.org/W1994114538","https://openalex.org/W2413205705","https://openalex.org/W2735644334","https://openalex.org/W138879321"],"abstract_inverted_index":{"Speech":[0],"presence":[1],"probability":[2,15],"(SPP)":[3],"estimators":[4,31],"work":[5],"in":[6,43,172],"the":[7,58,123,128,139,143],"short-time":[8],"Fourier":[9],"transform":[10],"domain":[11],"to":[12,70,89,106,126,137,157],"give":[13],"a":[14,34,55,59,93,112,153],"estimate":[16,90,158],"of":[17],"whether":[18],"speech":[19,44,48,87],"is":[20,104,135],"present":[21],"or":[22],"absent":[23],"at":[24],"each":[25,119],"time-frequency":[26],"bin.":[27],"Most":[28],"existing":[29,71],"SPP":[30,36,61,77,91,133,164],"have":[32],"achieved":[33],"high":[35,163],"detection":[37],"accuracy":[38,166],"and":[39,46,116,122],"are":[40],"deployed":[41],"successfully":[42],"enhancement":[45],"automatic":[47],"recognition.":[49],"In":[50,68],"this":[51],"work,":[52],"we":[53],"propose":[54],"single":[56],"channel":[57],"posteriori":[60],"estimator":[62,81],"based":[63,76],"on":[64],"hybrid":[65,97,129,140],"global-local":[66,98],"information.":[67,130],"contrast":[69],"deep":[72],"neural":[73],"networks":[74],"(DNNs)":[75],"estimation":[78,165],"approaches,":[79],"our":[80,149],"DNN":[82],"can":[83,161],"effectively":[84],"extract":[85,107],"helpful":[86],"representations":[88],"with":[92,167],"simpler":[94],"architecture.":[95],"Taking":[96],"information":[99,110,141],"into":[100,111,142],"account,":[101],"an":[102,132],"encoder":[103],"designed":[105],"high-dimensional":[108],"global":[109],"low-dimensional":[113],"latent":[114,124],"space":[115,125],"then":[117],"concatenate":[118],"frequency":[120],"bin":[121],"generate":[127],"Finally,":[131],"decoder":[134],"used":[136],"decode":[138],"SPP.":[144],"Experimental":[145],"results":[146],"demonstrate":[147],"that":[148],"proposed":[150],"method":[151],"provides":[152],"more":[154],"effective":[155],"way":[156],"SPP,":[159],"which":[160],"achieve":[162],"low":[168,173],"computational":[169],"complexity,":[170],"especially":[171],"signal-to-noise":[174],"ratio":[175],"conditions.":[176]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
