{"id":"https://openalex.org/W4392904704","doi":"https://doi.org/10.1109/icassp48485.2024.10447220","title":"Boosting Speech Enhancement with Clean Self-Supervised Features Via Conditional Variational Autoencoders","display_name":"Boosting Speech Enhancement with Clean Self-Supervised Features Via Conditional Variational Autoencoders","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904704","doi":"https://doi.org/10.1109/icassp48485.2024.10447220"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045637107","display_name":"Yoonhyung Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yoonhyung Lee","raw_affiliation_strings":["Seoul National University,Dept. of Electrical and Computer Engineering,Republic of Korea","Dept. of Electrical and Computer Engineering, Seoul National University, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Dept. of Electrical and Computer Engineering,Republic of Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077832834","display_name":"Kyomin Jung","orcid":"https://orcid.org/0000-0003-2547-7051"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kyomin Jung","raw_affiliation_strings":["Seoul National University,Dept. of Electrical and Computer Engineering,Republic of Korea","Dept. of Electrical and Computer Engineering, Seoul National University, Republic of Korea","Automation and Systems Research Institute, Seoul National University, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Dept. of Electrical and Computer Engineering,Republic of Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Dept. of Electrical and Computer Engineering, Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute, Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":0.2995,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.45889831,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"12396","last_page":"12400"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7689704895019531},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7461361289024353},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6138913631439209},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.610895574092865},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5384549498558044},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5301839709281921},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5093377828598022},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.4232070744037628},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.41849708557128906},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41695547103881836},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.24537310004234314},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.18465721607208252}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7689704895019531},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7461361289024353},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6138913631439209},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.610895574092865},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5384549498558044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5301839709281921},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5093377828598022},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4232070744037628},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.41849708557128906},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41695547103881836},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.24537310004234314},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.18465721607208252}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1478288009","display_name":null,"funder_award_id":"2021-0-02068","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G1763178188","display_name":null,"funder_award_id":"2021R1A2C2008855","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G2206812781","display_name":null,"funder_award_id":"2021-0-02068","funder_id":"https://openalex.org/F4320321292","funder_display_name":"Seoul National University"},{"id":"https://openalex.org/G4643994530","display_name":null,"funder_award_id":"2021-0-02068","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G5799620071","display_name":null,"funder_award_id":"2021-0-02068","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321292","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542"},{"id":"https://openalex.org/F4320322065","display_name":"National IT Industry Promotion Agency","ror":"https://ror.org/026v53e29"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2067295501","https://openalex.org/W2144404214","https://openalex.org/W2587284713","https://openalex.org/W2593414223","https://openalex.org/W2924334974","https://openalex.org/W3036601975","https://openalex.org/W3041956526","https://openalex.org/W3097777922","https://openalex.org/W3197284240","https://openalex.org/W3197729725","https://openalex.org/W3205533980","https://openalex.org/W3209059054","https://openalex.org/W4212774754","https://openalex.org/W4221166168","https://openalex.org/W4226403810","https://openalex.org/W4281779489","https://openalex.org/W4295177495","https://openalex.org/W4297841357","https://openalex.org/W4297841651","https://openalex.org/W4301371414","https://openalex.org/W4372341094","https://openalex.org/W4372346318","https://openalex.org/W6733471323","https://openalex.org/W6757817989","https://openalex.org/W6763486065","https://openalex.org/W6780218876","https://openalex.org/W6780593937","https://openalex.org/W6795952400","https://openalex.org/W6803547063","https://openalex.org/W6838276489","https://openalex.org/W6838843145"],"related_works":["https://openalex.org/W2120771489","https://openalex.org/W1546240199","https://openalex.org/W2051376034","https://openalex.org/W3211091508","https://openalex.org/W2955597484","https://openalex.org/W3110551121","https://openalex.org/W193702574","https://openalex.org/W72660888","https://openalex.org/W2089240210","https://openalex.org/W2063862874"],"abstract_inverted_index":{"Recently,":[0],"Self-Supervised":[1],"Features":[2],"(SSF)":[3],"trained":[4],"on":[5],"extensive":[6],"speech":[7,16,53],"datasets":[8],"have":[9],"shown":[10],"significant":[11],"performance":[12,139],"gains":[13],"across":[14,108],"various":[15],"processing":[17],"tasks.":[18],"Nevertheless,":[19],"their":[20],"effectiveness":[21,120],"in":[22,86,143],"Speech":[23],"Enhancement":[24],"(SE)":[25],"systems":[26],"is":[27],"often":[28],"suboptimal":[29],"due":[30],"to":[31,80,117,136],"insufficient":[32],"optimization":[33],"for":[34,54,64],"noisy":[35],"environments.":[36],"To":[37],"address":[38],"this":[39],"issue,":[40],"we":[41,59,76,113],"present":[42],"a":[43],"novel":[44],"methodology":[45],"that":[46,105],"directly":[47],"utilizes":[48],"SSFs":[49,63,89,107,125],"extracted":[50],"from":[51,140],"clean":[52,62,88,124],"enhancing":[55],"SE":[56,144],"models.":[57],"Specifically,":[58],"leverage":[60,82],"the":[61,69,83,87,91,119,127,132],"latent":[65],"space":[66],"modeling":[67],"within":[68,126],"Conditional":[70],"Variational":[71],"Autoencoder":[72],"(CVAE)":[73],"framework.":[74],"Consequently,":[75],"enable":[77],"our":[78,97,141],"model":[79],"fully":[81],"knowledge":[84],"existing":[85,103],"without":[90],"interference":[92],"of":[93,121],"noise.":[94],"In":[95],"experiments,":[96],"approach":[98,142],"yields":[99],"clear":[100],"improvements":[101],"over":[102],"methods":[104],"use":[106],"six":[109],"evaluation":[110],"metrics.":[111],"Furthermore,":[112],"provide":[114],"comprehensive":[115],"analyses":[116],"validate":[118],"1)":[122],"incorporating":[123],"CVAE":[128],"framework":[129],"and":[130,147],"2)":[131],"training":[133],"techniques":[134],"used":[135],"achieve":[137],"optimal":[138],"systems.":[145],"Code":[146],"audio":[148],"samples":[149],"are":[150],"available":[151],"at":[152],"https://github.com/YoonhyungLee94/SSFCVAE":[153]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
