{"id":"https://openalex.org/W4387934967","doi":"https://doi.org/10.1109/taslp.2023.3321975","title":"A Two-Stage Deep Representation Learning-Based Speech Enhancement Method Using Variational Autoencoder and Adversarial Training","display_name":"A Two-Stage Deep Representation Learning-Based Speech Enhancement Method Using Variational Autoencoder and Adversarial Training","publication_year":2023,"publication_date":"2023-10-25","ids":{"openalex":"https://openalex.org/W4387934967","doi":"https://doi.org/10.1109/taslp.2023.3321975"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3321975","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3321975","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2211.09166","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029253113","display_name":"Yang Xiang","orcid":"https://orcid.org/0000-0002-7120-5842"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":true,"raw_author_name":"Yang Xiang","raw_affiliation_strings":["Audio Analysis Lab, CREATE, Aalborg University, Aalborg, Denmark","Capturi A/S, Aarhus, Denmark"],"affiliations":[{"raw_affiliation_string":"Audio Analysis Lab, CREATE, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Capturi A/S, Aarhus, Denmark","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063946978","display_name":"Jesper Lisby H\u00f8jvang","orcid":"https://orcid.org/0009-0002-6278-9353"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jesper Lisby H\u00f8jvang","raw_affiliation_strings":["Capturi A/S, Aarhus, Denmark"],"affiliations":[{"raw_affiliation_string":"Capturi A/S, Aarhus, Denmark","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054964567","display_name":"Morten H\u00f8jfeldt Rasmussen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morten H\u00f8jfeldt Rasmussen","raw_affiliation_strings":["Capturi A/S, Aarhus, Denmark"],"affiliations":[{"raw_affiliation_string":"Capturi A/S, Aarhus, Denmark","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026658144","display_name":"Mads Gr\u00e6sb\u00f8ll Christensen","orcid":"https://orcid.org/0000-0003-3586-7969"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Mads Gr\u00e6sb\u00f8ll Christensen","raw_affiliation_strings":["Audio Analysis Lab, CREATE, Aalborg University, Aalborg, Denmark"],"affiliations":[{"raw_affiliation_string":"Audio Analysis Lab, CREATE, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5029253113"],"corresponding_institution_ids":["https://openalex.org/I891191580"],"apc_list":null,"apc_paid":null,"fwci":1.4006,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82683017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"32","issue":null,"first_page":"164","last_page":"177"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.9672822952270508},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7943090796470642},{"id":"https://openalex.org/keywords/stage","display_name":"Stage (stratigraphy)","score":0.6340094804763794},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6227313280105591},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6198277473449707},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5687423944473267},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5660580992698669},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.550351083278656},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.475736141204834},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46184295415878296},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40422767400741577},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3578488826751709},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07608270645141602},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.0718294084072113}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.9672822952270508},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7943090796470642},{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.6340094804763794},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6227313280105591},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6198277473449707},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5687423944473267},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5660580992698669},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.550351083278656},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.475736141204834},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46184295415878296},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40422767400741577},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3578488826751709},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07608270645141602},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0718294084072113},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2023.3321975","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3321975","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/cfed461e-1993-4d39-be77-0be98e7d70d0","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/cfed461e-1993-4d39-be77-0be98e7d70d0","pdf_url":"https://arxiv.org/pdf/2211.09166","source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Xiang, Y, H\u00f8jvang, J L, Rasmussen, M H & Christensen, M G 2024, 'A Two-Stage Deep Representation Learning-Based Speech Enhancement Method Using Variational Autoencoder and Adversarial Training', IEEE/ACM Transactions on Audio Speech and Language Processing, vol. 32, pp. 164-177. https://doi.org/10.1109/TASLP.2023.3321975","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:publications/cfed461e-1993-4d39-be77-0be98e7d70d0","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/cfed461e-1993-4d39-be77-0be98e7d70d0","pdf_url":"https://arxiv.org/pdf/2211.09166","source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Xiang, Y, H\u00f8jvang, J L, Rasmussen, M H & Christensen, M G 2024, 'A Two-Stage Deep Representation Learning-Based Speech Enhancement Method Using Variational Autoencoder and Adversarial Training', IEEE/ACM Transactions on Audio Speech and Language Processing, vol. 32, pp. 164-177. https://doi.org/10.1109/TASLP.2023.3321975","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G3106244815","display_name":null,"funder_award_id":"9065-0004","funder_id":"https://openalex.org/F4320313796","funder_display_name":"Innovationsfonden"},{"id":"https://openalex.org/G3837023311","display_name":null,"funder_award_id":"9065-00046","funder_id":"https://openalex.org/F4320313796","funder_display_name":"Innovationsfonden"}],"funders":[{"id":"https://openalex.org/F4320313796","display_name":"Innovationsfonden","ror":"https://ror.org/00daj4111"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387934967.pdf"},"referenced_works_count":109,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1790748249","https://openalex.org/W1901129140","https://openalex.org/W1959608418","https://openalex.org/W1974387177","https://openalex.org/W1992475611","https://openalex.org/W2013608223","https://openalex.org/W2025515167","https://openalex.org/W2031647436","https://openalex.org/W2044893557","https://openalex.org/W2069681747","https://openalex.org/W2078528584","https://openalex.org/W2095072097","https://openalex.org/W2125630648","https://openalex.org/W2128604088","https://openalex.org/W2141998673","https://openalex.org/W2142148616","https://openalex.org/W2146502635","https://openalex.org/W2157331557","https://openalex.org/W2159202424","https://openalex.org/W2163922914","https://openalex.org/W2190061941","https://openalex.org/W2222244689","https://openalex.org/W2291877678","https://openalex.org/W2419501139","https://openalex.org/W2593414223","https://openalex.org/W2605589342","https://openalex.org/W2746457594","https://openalex.org/W2753738274","https://openalex.org/W2758785877","https://openalex.org/W2766672686","https://openalex.org/W2796704765","https://openalex.org/W2802220650","https://openalex.org/W2883322837","https://openalex.org/W2900450731","https://openalex.org/W2901552243","https://openalex.org/W2949756029","https://openalex.org/W2950378732","https://openalex.org/W2952218014","https://openalex.org/W2962850167","https://openalex.org/W2962866211","https://openalex.org/W2963045393","https://openalex.org/W2963341071","https://openalex.org/W2963364041","https://openalex.org/W2963453742","https://openalex.org/W2963490371","https://openalex.org/W2963828919","https://openalex.org/W2964167449","https://openalex.org/W2965121295","https://openalex.org/W2970006822","https://openalex.org/W2972592847","https://openalex.org/W2973133192","https://openalex.org/W3016120385","https://openalex.org/W3031135612","https://openalex.org/W3033915435","https://openalex.org/W3092028330","https://openalex.org/W3092864146","https://openalex.org/W3096408984","https://openalex.org/W3096900631","https://openalex.org/W3099330747","https://openalex.org/W3124794156","https://openalex.org/W3130335839","https://openalex.org/W3131332223","https://openalex.org/W3136499730","https://openalex.org/W3160563341","https://openalex.org/W3160567113","https://openalex.org/W3161140524","https://openalex.org/W3161480375","https://openalex.org/W3165462110","https://openalex.org/W3169905056","https://openalex.org/W3176823897","https://openalex.org/W3194338569","https://openalex.org/W3195288392","https://openalex.org/W3197042120","https://openalex.org/W3200754814","https://openalex.org/W3201698955","https://openalex.org/W3207340675","https://openalex.org/W3207551191","https://openalex.org/W4220862273","https://openalex.org/W4221135977","https://openalex.org/W4225263883","https://openalex.org/W4225302959","https://openalex.org/W4225905067","https://openalex.org/W4253928870","https://openalex.org/W4295312788","https://openalex.org/W4297841603","https://openalex.org/W4319585899","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6639824700","https://openalex.org/W6640963894","https://openalex.org/W6679009796","https://openalex.org/W6681435938","https://openalex.org/W6687506355","https://openalex.org/W6717434760","https://openalex.org/W6738884980","https://openalex.org/W6744627333","https://openalex.org/W6745117592","https://openalex.org/W6750852989","https://openalex.org/W6753113894","https://openalex.org/W6757632829","https://openalex.org/W6766978945","https://openalex.org/W6767111847","https://openalex.org/W6783867762","https://openalex.org/W6785764544","https://openalex.org/W6796386619","https://openalex.org/W6796464841"],"related_works":["https://openalex.org/W2669956259","https://openalex.org/W4249005693","https://openalex.org/W4392946183","https://openalex.org/W4405887298","https://openalex.org/W2983142544","https://openalex.org/W2891059443","https://openalex.org/W4281663961","https://openalex.org/W3208888551","https://openalex.org/W4313561566","https://openalex.org/W3208386644"],"abstract_inverted_index":{"This":[0],"article":[1],"focuses":[2],"on":[3,26],"leveraging":[4],"deep":[5,19],"representation":[6],"learning":[7,28],"(DRL)":[8],"for":[9,188],"speech":[10,96,226],"enhancement":[11],"(SE).":[12],"In":[13,61],"general,":[14],"the":[15,18,27,33,62,84,95,105,110,143,147,153,159,168,171,179,189,208,236],"performance":[16,199],"of":[17,29,146,155,170,225],"neural":[20],"network":[21],"(DNN)":[22],"is":[23,36,128],"heavily":[24],"dependent":[25],"data":[30],"representation.":[31],"However,":[32,108],"DRL's":[34],"importance":[35],"often":[37],"ignored":[38],"in":[39,158],"many":[40],"DNN-based":[41,214],"SE":[42,56,198,215,244],"algorithms.":[43,245],"To":[44,150],"obtain":[45,94],"a":[46,53,77,121,195],"higher":[47,218],"quality":[48,154,227],"enhanced":[49,79,156],"speech,":[50,157],"we":[51,65,82,117,162],"propose":[52],"two-stage":[54],"DRL-based":[55],"method":[57],"through":[58],"adversarial":[59,164],"training.":[60],"first":[63],"stage,":[64,161],"disentangle":[66],"different":[67],"latent":[68],"variables":[69],"because":[70],"disentangled":[71],"representations":[72,103,113],"can":[73,118,200,211,239],"help":[74,212],"DNN":[75],"generate":[76],"better":[78,197],"speech.":[80],"Specifically,":[81],"use":[83],"<inline-formula":[85],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[86],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[87],"notation=\"LaTeX\">$\\beta$</tex-math></inline-formula>":[88],"-variational":[89],"autoencoder":[90],"(VAE)":[91],"algorithm":[92,185,238],"to":[93,124,130,166],"and":[97,101,112,116,177,229],"noise":[98],"posterior":[99,173,192],"estimations":[100,134],"related":[102],"from":[104],"observed":[106],"signal.":[107],"since":[109],"posteriors":[111],"are":[114,135],"intractable":[115],"only":[119],"apply":[120],"conditional":[122],"assumption":[123],"estimate":[125],"them,":[126],"it":[127],"difficult":[129],"ensure":[131],"that":[132,207],"these":[133],"always":[136],"pretty":[137],"accurate,":[138],"which":[139],"may":[140],"potentially":[141,190],"degrade":[142],"final":[144],"accuracy":[145],"signal":[148,175,180],"estimation.":[149],"further":[151],"improve":[152,178],"second":[160],"introduce":[163],"training":[165],"reduce":[167],"effect":[169],"inaccurate":[172,191],"towards":[174],"reconstruction":[176],"estimation":[181],"accuracy,":[182],"making":[183],"our":[184],"more":[186],"robust":[187],"estimations.":[193],"As":[194],"result,":[196],"be":[201],"achieved.":[202],"The":[203],"experimental":[204],"results":[205],"indicate":[206],"proposed":[209,237],"strategy":[210],"similar":[213],"algorithms":[216],"achieve":[217],"short-time":[219],"objective":[220],"intelligibility":[221],"(STOI),":[222],"perceptual":[223],"evaluation":[224],"(PESQ),":[228],"scale-invariant":[230],"signal-to-distortion":[231],"ratio":[232],"(SI-SDR)":[233],"scores.":[234],"Moreover,":[235],"also":[240],"outperform":[241],"recent":[242],"competitive":[243]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
