{"id":"https://openalex.org/W4225682608","doi":"https://doi.org/10.1109/asru51503.2021.9688031","title":"PL-EESR: Perceptual Loss Based End-to-End Robust Speaker Representation Extraction","display_name":"PL-EESR: Perceptual Loss Based End-to-End Robust Speaker Representation Extraction","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4225682608","doi":"https://doi.org/10.1109/asru51503.2021.9688031"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688031","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688031","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116252196","display_name":"Yi Ma","orcid":"https://orcid.org/0009-0008-5473-5498"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yi Ma","raw_affiliation_strings":["National University of Singapore,Department of Electrical and Computer Engineering,Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287909","display_name":"Kong Aik Lee","orcid":"https://orcid.org/0000-0001-9133-3000"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kong Aik Lee","raw_affiliation_strings":["Institute for Infocomm Research, A&#x002A;STAR,Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, A&#x002A;STAR,Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037259225","display_name":"Ville Hautam\u00e4ki","orcid":"https://orcid.org/0000-0002-5885-0003"},"institutions":[{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]},{"id":"https://openalex.org/I4210107997","display_name":"Finland University","ror":"https://ror.org/014rks409","country_code":"FI","type":"education","lineage":["https://openalex.org/I4210107997"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Ville Hautamaki","raw_affiliation_strings":["School of Computing, University of Eastern Finland,Finland","School of Computing, University of Eastern Finland, Finland"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Eastern Finland,Finland","institution_ids":["https://openalex.org/I4210107997","https://openalex.org/I175532246"]},{"raw_affiliation_string":"School of Computing, University of Eastern Finland, Finland","institution_ids":["https://openalex.org/I4210107997","https://openalex.org/I175532246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["National University of Singapore,Department of Electrical and Computer Engineering,Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5116252196"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":1.2042,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86836803,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"106","last_page":"113"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7927762269973755},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7825053930282593},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.5678977370262146},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5668689608573914},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5011892318725586},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.44018301367759705},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.43617427349090576},{"id":"https://openalex.org/keywords/speaker-identification","display_name":"Speaker identification","score":0.4276455342769623},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.42643728852272034},{"id":"https://openalex.org/keywords/noise-measurement","display_name":"Noise measurement","score":0.42268896102905273},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.41584882140159607},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4065389037132263},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4050159454345703},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.27398359775543213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7927762269973755},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7825053930282593},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.5678977370262146},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5668689608573914},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5011892318725586},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.44018301367759705},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.43617427349090576},{"id":"https://openalex.org/C2986627078","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker identification","level":3,"score":0.4276455342769623},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.42643728852272034},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.42268896102905273},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41584882140159607},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4065389037132263},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4050159454345703},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.27398359775543213},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688031","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688031","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8582484108","display_name":null,"funder_award_id":"192 25 00054,A18A2b0046,FS-2021-001","funder_id":"https://openalex.org/F4320320696","funder_display_name":"Agency for Science, Technology and Research"}],"funders":[{"id":"https://openalex.org/F4320320696","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09"},{"id":"https://openalex.org/F4320334971","display_name":"Science and Engineering Research Council","ror":"https://ror.org/00zgdb249"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W46095808","https://openalex.org/W111477576","https://openalex.org/W1524333225","https://openalex.org/W2046056978","https://openalex.org/W2069681747","https://openalex.org/W2087126002","https://openalex.org/W2136879537","https://openalex.org/W2147147599","https://openalex.org/W2219249508","https://openalex.org/W2291877678","https://openalex.org/W2331128040","https://openalex.org/W2516764878","https://openalex.org/W2696967604","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2972425344","https://openalex.org/W2982037672","https://openalex.org/W3024301174","https://openalex.org/W3025783616","https://openalex.org/W3049430014","https://openalex.org/W3094911989","https://openalex.org/W3095134095","https://openalex.org/W3096324175","https://openalex.org/W3097073596","https://openalex.org/W3153893842","https://openalex.org/W3163464523","https://openalex.org/W4245919820","https://openalex.org/W6631362777","https://openalex.org/W6688816777","https://openalex.org/W6784901908"],"related_works":["https://openalex.org/W4234190324","https://openalex.org/W2126085626","https://openalex.org/W2545131480","https://openalex.org/W1843883989","https://openalex.org/W208363076","https://openalex.org/W1973693762","https://openalex.org/W2087341511","https://openalex.org/W4230495983","https://openalex.org/W2162533796","https://openalex.org/W2378903289"],"abstract_inverted_index":{"Speech":[0],"enhancement":[1],"aims":[2],"to":[3,23,95,100],"improve":[4],"the":[5,9,15,32,62,65,70,75,101,122],"perceptual":[6,72],"quality":[7],"of":[8,14,34,64],"speech":[10,24,77],"signal":[11,78],"by":[12],"suppression":[13,20],"background":[16],"noise.":[17],"However,":[18],"excessive":[19],"may":[21],"lead":[22],"distortion":[25],"and":[26,69,79,91,111],"speaker":[27,35,53,66,85,123],"information":[28,125],"loss,":[29],"which":[30,114],"degrades":[31],"performance":[33,107],"embedding":[36],"extraction.":[37,55],"To":[38],"alleviate":[39],"this":[40],"problem,":[41],"we":[42],"propose":[43],"an":[44],"end-to-end":[45],"deep":[46],"learning":[47],"framework,":[48],"dubbed":[49],"PL-EESR,":[50],"for":[51],"robust":[52],"representation":[54],"This":[56],"framework":[57],"is":[58],"optimized":[59],"based":[60],"on":[61],"feedback":[63],"identification":[67],"task":[68],"high-level":[71],"deviation":[73],"between":[74],"raw":[76],"its":[80],"noisy":[81,90,112],"version.":[82],"We":[83],"conducted":[84],"verification":[86],"tasks":[87],"in":[88,108],"both":[89,109],"clean":[92,110],"environment":[93],"respectively":[94],"evaluate":[96],"our":[97,103,116],"system.":[98],"Compared":[99],"baseline,":[102],"method":[104,117],"shows":[105],"better":[106],"environments,":[113],"means":[115],"can":[118],"not":[119],"only":[120],"enhance":[121],"relative":[124],"but":[126],"also":[127],"avoid":[128],"adding":[129],"distortions.":[130]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
