{"id":"https://openalex.org/W3015826515","doi":"https://doi.org/10.1109/icassp40776.2020.9054535","title":"Zero-Shot Multi-Speaker Text-To-Speech with State-Of-The-Art Neural Speaker Embeddings","display_name":"Zero-Shot Multi-Speaker Text-To-Speech with State-Of-The-Art Neural Speaker Embeddings","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015826515","doi":"https://doi.org/10.1109/icassp40776.2020.9054535","mag":"3015826515"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054535","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054535","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082349516","display_name":"Erica Cooper","orcid":"https://orcid.org/0000-0002-2978-2793"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Erica Cooper","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010825170","display_name":"Cheng-I Lai","orcid":"https://orcid.org/0000-0002-2343-8596"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cheng-I Lai","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068072475","display_name":"Yusuke Yasuda","orcid":"https://orcid.org/0000-0002-2130-747X"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yusuke Yasuda","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012466506","display_name":"Fuming Fang","orcid":"https://orcid.org/0000-0002-9332-3735"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Fuming Fang","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071702566","display_name":"Nanxin Chen","orcid":"https://orcid.org/0000-0001-6698-1604"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nanxin Chen","raw_affiliation_strings":["Johns Hopkins University, Baltimore, USA"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5082349516"],"corresponding_institution_ids":["https://openalex.org/I184597095"],"apc_list":null,"apc_paid":null,"fwci":15.7689,"has_fulltext":false,"cited_by_count":159,"citation_normalized_percentile":{"value":0.99261707,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"6184","last_page":"6188"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.7743533253669739},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7707175016403198},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.7511510848999023},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.7152670621871948},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6794265508651733},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.6508171558380127},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5528367757797241},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5357145071029663},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5296841859817505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38598722219467163},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3508863151073456},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12535223364830017},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06706124544143677},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.060667186975479126}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.7743533253669739},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7707175016403198},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.7511510848999023},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.7152670621871948},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6794265508651733},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.6508171558380127},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5528367757797241},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5357145071029663},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5296841859817505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38598722219467163},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3508863151073456},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12535223364830017},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06706124544143677},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.060667186975479126},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054535","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054535","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6000000238418579,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1529628403","https://openalex.org/W2023238506","https://openalex.org/W2041823554","https://openalex.org/W2064364374","https://openalex.org/W2150769028","https://openalex.org/W2519091744","https://openalex.org/W2527729766","https://openalex.org/W2726515241","https://openalex.org/W2788357188","https://openalex.org/W2808631503","https://openalex.org/W2808706139","https://openalex.org/W2884412522","https://openalex.org/W2886769154","https://openalex.org/W2888968865","https://openalex.org/W2890964092","https://openalex.org/W2892620417","https://openalex.org/W2903853691","https://openalex.org/W2916104401","https://openalex.org/W2917245127","https://openalex.org/W2951758756","https://openalex.org/W2960427821","https://openalex.org/W2962739369","https://openalex.org/W2963035245","https://openalex.org/W2963192573","https://openalex.org/W2963371159","https://openalex.org/W2963403868","https://openalex.org/W2963432880","https://openalex.org/W2963609956","https://openalex.org/W2963912924","https://openalex.org/W2963945466","https://openalex.org/W2963975282","https://openalex.org/W2964243274","https://openalex.org/W2972440097","https://openalex.org/W2972574864","https://openalex.org/W2972610613","https://openalex.org/W2972633940","https://openalex.org/W2972772214","https://openalex.org/W2972944317","https://openalex.org/W2972961496","https://openalex.org/W2973084242","https://openalex.org/W2973157397","https://openalex.org/W3010925296","https://openalex.org/W4289750118","https://openalex.org/W4298174729","https://openalex.org/W4385245566","https://openalex.org/W6631852945","https://openalex.org/W6677973343","https://openalex.org/W6739901393","https://openalex.org/W6748573829","https://openalex.org/W6752888775","https://openalex.org/W6753575415","https://openalex.org/W6753855596","https://openalex.org/W6755135894","https://openalex.org/W6757322325","https://openalex.org/W6765653190","https://openalex.org/W6936113694"],"related_works":["https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W2206035908","https://openalex.org/W2108985546","https://openalex.org/W2433276473","https://openalex.org/W2077992636","https://openalex.org/W1537411440","https://openalex.org/W2535215250","https://openalex.org/W290673751","https://openalex.org/W2017702615"],"abstract_inverted_index":{"While":[0],"speaker":[1,8,13,48,51,59,74,81],"adaptation":[2,26,92],"for":[3,15,24,34,53,85,90],"end-to-end":[4,35,97],"speech":[5,98],"synthesis":[6,37],"using":[7],"embeddings":[9,49,60,78],"can":[10,65],"produce":[11],"good":[12],"similarity":[14,52,82],"speakers":[16,87,95],"seen":[17],"during":[18],"training,":[19],"there":[20],"remains":[21],"a":[22,73],"gap":[23],"zero-shot":[25,91],"to":[27,93],"unseen":[28,54,86],"speakers.":[29,55],"We":[30],"investigate":[31],"multi-speaker":[32],"modeling":[33],"text-to-speech":[36],"and":[38,83],"study":[39],"the":[40],"effects":[41],"of":[42,45],"different":[43],"types":[44],"state-of-the-art":[46],"neural":[47],"on":[50],"Learnable":[56],"dictionary":[57],"encoding-based":[58],"with":[61],"angular":[62],"softmax":[63],"loss":[64],"improve":[66,80],"equal":[67],"error":[68],"rates":[69],"over":[70],"x-vectors":[71],"in":[72,96],"verification":[75],"task;":[76],"these":[77],"also":[79],"naturalness":[84],"when":[88],"used":[89],"new":[94],"synthesis.":[99]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":31},{"year":2022,"cited_by_count":39},{"year":2021,"cited_by_count":41},{"year":2020,"cited_by_count":5}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
