{"id":"https://openalex.org/W4372266898","doi":"https://doi.org/10.1109/icassp49357.2023.10096166","title":"Leveraging Heteroscedastic Uncertainty in Learning Complex Spectral Mapping for Single-Channel Speech Enhancement","display_name":"Leveraging Heteroscedastic Uncertainty in Learning Complex Spectral Mapping for Single-Channel Speech Enhancement","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372266898","doi":"https://doi.org/10.1109/icassp49357.2023.10096166"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096166","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096166","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100751234","display_name":"Kuan\u2010Lin Chen","orcid":"https://orcid.org/0009-0005-4067-0927"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kuan-Lin Chen","raw_affiliation_strings":["Meta Reality Labs Research","Department of Electrical and Computer Engineering, University of California, San Diego"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, San Diego","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013369321","display_name":"Daniel Wong","orcid":"https://orcid.org/0000-0001-5069-1557"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel D. E. Wong","raw_affiliation_strings":["Meta Reality Labs Research"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075227645","display_name":"Ke Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ke Tan","raw_affiliation_strings":["Meta Reality Labs Research"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040562171","display_name":"Buye Xu","orcid":"https://orcid.org/0000-0002-3027-7567"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Buye Xu","raw_affiliation_strings":["Meta Reality Labs Research"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080751032","display_name":"Anurag Kumar","orcid":"https://orcid.org/0000-0002-1164-144X"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anurag Kumar","raw_affiliation_strings":["Meta Reality Labs Research"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108520440","display_name":"Vamsi Krishna Ithapu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vamsi Krishna Ithapu","raw_affiliation_strings":["Meta Reality Labs Research"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100751234"],"corresponding_institution_ids":["https://openalex.org/I36258959","https://openalex.org/I4210128585"],"apc_list":null,"apc_paid":null,"fwci":0.4066,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.54071478,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/undersampling","display_name":"Undersampling","score":0.7238044738769531},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.6493638753890991},{"id":"https://openalex.org/keywords/heteroscedasticity","display_name":"Heteroscedasticity","score":0.6288315057754517},{"id":"https://openalex.org/keywords/covariance","display_name":"Covariance","score":0.5773897171020508},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.5461538434028625},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5012252330780029},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.46306127309799194},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4355141818523407},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.41718152165412903},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3260207772254944},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.29984647035598755},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2728578448295593}],"concepts":[{"id":"https://openalex.org/C136536468","wikidata":"https://www.wikidata.org/wiki/Q1225894","display_name":"Undersampling","level":2,"score":0.7238044738769531},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.6493638753890991},{"id":"https://openalex.org/C101104100","wikidata":"https://www.wikidata.org/wiki/Q1063540","display_name":"Heteroscedasticity","level":2,"score":0.6288315057754517},{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.5773897171020508},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.5461538434028625},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5012252330780029},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.46306127309799194},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4355141818523407},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.41718152165412903},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3260207772254944},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.29984647035598755},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2728578448295593},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096166","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096166","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6600000262260437,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W306230967","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1560021816","https://openalex.org/W2044893557","https://openalex.org/W2078528584","https://openalex.org/W2103496339","https://openalex.org/W2137983211","https://openalex.org/W2141998673","https://openalex.org/W2405774341","https://openalex.org/W2600383743","https://openalex.org/W2774389566","https://openalex.org/W2885308148","https://openalex.org/W2889442120","https://openalex.org/W2919310412","https://openalex.org/W2937484199","https://openalex.org/W2962866211","https://openalex.org/W2962990163","https://openalex.org/W2971417062","https://openalex.org/W2985272841","https://openalex.org/W2991361823","https://openalex.org/W3096408984","https://openalex.org/W3100019193","https://openalex.org/W3160085755","https://openalex.org/W3197042120","https://openalex.org/W4221157442","https://openalex.org/W4224920207","https://openalex.org/W4225320295","https://openalex.org/W4283326514","https://openalex.org/W4287083725","https://openalex.org/W4297841766","https://openalex.org/W6631190155","https://openalex.org/W6695533872","https://openalex.org/W6735443497","https://openalex.org/W6768698167","https://openalex.org/W6798726245","https://openalex.org/W6810059299"],"related_works":["https://openalex.org/W2109073422","https://openalex.org/W2887783772","https://openalex.org/W2101754595","https://openalex.org/W4300066510","https://openalex.org/W1971337326","https://openalex.org/W2056958800","https://openalex.org/W4213259725","https://openalex.org/W4311388919","https://openalex.org/W1964490787","https://openalex.org/W2060696366"],"abstract_inverted_index":{"Most":[0],"speech":[1],"enhancement":[2,66],"(SE)":[3],"models":[4],"learn":[5],"a":[6,32,50,57],"point":[7],"estimate":[8],"and":[9,97,123,153],"do":[10],"not":[11],"make":[12],"use":[13],"of":[14,64],"uncertainty":[15,29,94],"estimation":[16],"in":[17],"the":[18,62,65,77,93,133,144],"learning":[19,52],"process.":[20],"In":[21],"this":[22],"paper,":[23],"we":[24,130],"show":[25,131],"that":[26,132],"modeling":[27],"heteroscedastic":[28],"by":[30],"minimizing":[31],"multivariate":[33,114],"Gaussian":[34],"negative":[35],"log-likelihood":[36],"(NLL)":[37],"improves":[38],"SE":[39,85],"performance":[40,137],"at":[41,68],"no":[42],"extra":[43],"cost.":[44],"During":[45],"training,":[46],"our":[47,90],"approach":[48,91],"augments":[49],"model":[51],"complex":[53],"spectral":[54],"mapping":[55],"with":[56,102,110],"temporary":[58],"submodel":[59],"to":[60,73,84,139],"predict":[61],"covariance":[63,78,118],"error":[67,147,151],"each":[69,99],"time-frequency":[70],"bin.":[71],"Due":[72],"unrestricted":[74],"heteroscedas-tic":[75],"uncertainty,":[76,104],"introduces":[79],"an":[80],"undersampling":[81],"effect,":[82],"detrimental":[83],"performance.":[86],"To":[87],"mitigate":[88],"undersampling,":[89],"inflates":[92],"lower":[95],"bound":[96],"weights":[98],"loss":[100,141],"component":[101],"their":[103],"effectively":[105],"compensating":[106],"severely":[107],"undersampled":[108],"components":[109],"more":[111],"penalties.":[112],"Our":[113],"setting":[115],"reveals":[116],"common":[117],"assumptions":[119],"such":[120],"as":[121],"scalar":[122],"diagonal":[124],"matrices.":[125],"By":[126],"weakening":[127],"these":[128],"assumptions,":[129],"NLL":[134],"achieves":[135],"superior":[136],"compared":[138],"popular":[140],"functions":[142],"including":[143],"mean":[145,149],"squared":[146],"(MSE),":[148],"absolute":[150],"(MAE),":[152],"scale-invariant":[154],"signal-to-distortion":[155],"ratio":[156],"(SI-SDR).":[157]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
