{"id":"https://openalex.org/W3194622036","doi":"https://doi.org/10.21437/ssw.2021-24","title":"Preliminary study on using vector quantization latent spaces for TTS/VC systems with consistent performance","display_name":"Preliminary study on using vector quantization latent spaces for TTS/VC systems with consistent performance","publication_year":2021,"publication_date":"2021-08-24","ids":{"openalex":"https://openalex.org/W3194622036","doi":"https://doi.org/10.21437/ssw.2021-24","mag":"3194622036"},"language":"en","primary_location":{"id":"doi:10.21437/ssw.2021-24","is_oa":false,"landing_page_url":"https://doi.org/10.21437/ssw.2021-24","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"11th ISCA Speech Synthesis Workshop (SSW 11)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002145453","display_name":"Hieu-Thi Luong","orcid":"https://orcid.org/0000-0002-4772-5995"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hieu-Thi Luong","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5002145453"],"corresponding_institution_ids":["https://openalex.org/I184597095"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09935551,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"136","last_page":"141"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.7036004066467285},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6371523141860962},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.546349823474884},{"id":"https://openalex.org/keywords/learning-vector-quantization","display_name":"Learning vector quantization","score":0.47397512197494507},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31310999393463135},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24291399121284485}],"concepts":[{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.7036004066467285},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6371523141860962},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.546349823474884},{"id":"https://openalex.org/C40567965","wikidata":"https://www.wikidata.org/wiki/Q1820283","display_name":"Learning vector quantization","level":3,"score":0.47397512197494507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31310999393463135},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24291399121284485}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/ssw.2021-24","is_oa":false,"landing_page_url":"https://doi.org/10.21437/ssw.2021-24","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"11th ISCA Speech Synthesis Workshop (SSW 11)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1526460031","display_name":null,"funder_award_id":"18H04112","funder_id":"https://openalex.org/F4320320912","funder_display_name":"Ministry of Education, Culture, Sports, Science and Technology"},{"id":"https://openalex.org/G2292762562","display_name":null,"funder_award_id":"21H04906","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G2792802287","display_name":null,"funder_award_id":"KAKENHI","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"},{"id":"https://openalex.org/G6299358200","display_name":"PRISM: Speech privacy preservation based on selecting masking","funder_award_id":"18H04112","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G6718509927","display_name":null,"funder_award_id":"CREST","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7879866177","display_name":null,"funder_award_id":"CREST","funder_id":"https://openalex.org/F4320320912","funder_display_name":"Ministry of Education, Culture, Sports, Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320320912","display_name":"Ministry of Education, Culture, Sports, Science and Technology","ror":"https://ror.org/048rj2z13"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W37526647","https://openalex.org/W2108501770","https://openalex.org/W2532494225","https://openalex.org/W2547875792","https://openalex.org/W2759925408","https://openalex.org/W2794490148","https://openalex.org/W2901997113","https://openalex.org/W2910577860","https://openalex.org/W2938947737","https://openalex.org/W2945478979","https://openalex.org/W2946555236","https://openalex.org/W2962896155","https://openalex.org/W2963799213","https://openalex.org/W2963828549","https://openalex.org/W2963912679","https://openalex.org/W2972374322","https://openalex.org/W2972595148","https://openalex.org/W2972848589","https://openalex.org/W2972849140","https://openalex.org/W3015434413","https://openalex.org/W3091928890","https://openalex.org/W3092496982","https://openalex.org/W3095990227","https://openalex.org/W3118753411","https://openalex.org/W3144044466","https://openalex.org/W3154451338","https://openalex.org/W3160584619","https://openalex.org/W3163906773","https://openalex.org/W3199367817","https://openalex.org/W4287632938","https://openalex.org/W4288107125"],"related_works":["https://openalex.org/W2100968651","https://openalex.org/W4243803532","https://openalex.org/W2142248489","https://openalex.org/W2352648934","https://openalex.org/W4230688072","https://openalex.org/W1915693853","https://openalex.org/W1530525041","https://openalex.org/W2378212145","https://openalex.org/W2798892016","https://openalex.org/W2202992072"],"abstract_inverted_index":{"Generally":[0],"speaking,":[1],"the":[2,20,24,32,40,54,61,69,76,80,111,140,151],"main":[3],"objective":[4],"when":[5],"training":[6],"a":[7,87,98,121,131],"neural":[8,25],"speech":[9,18],"synthesis":[10],"system":[11,41,114],"is":[12,136,144,155],"to":[13,31,59,85],"synthesize":[14],"natural":[15],"and":[16,65,105,160],"expressive":[17],"from":[19],"output":[21],"layer":[22],"of":[23,56,103,126,163],"network":[26],"without":[27],"much":[28],"attention":[29],"given":[30],"hidden":[33],"layers.However,":[34],"by":[35],"learning":[36],"useful":[37,137],"latent":[38,62,77,88,133],"representation,":[39],"can":[42],"be":[43],"used":[44],"for":[45,138,146,157],"many":[46],"more":[47],"practical":[48],"scenarios.In":[49],"this":[50],"paper,":[51],"we":[52,82],"investigate":[53],"use":[55],"quantized":[57],"vectors":[58],"model":[60],"linguistic":[63,89],"embedding":[64,90],"compare":[66],"it":[67],"with":[68,116],"continuous":[70],"counterpart.By":[71],"enforcing":[72],"different":[73,94],"policies":[74],"over":[75],"spaces":[78],"in":[79,101,124],"training,":[81],"are":[83],"able":[84],"obtain":[86],"that":[91,110,135,164],"takes":[92],"on":[93],"properties":[95],"while":[96],"having":[97],"similar":[99],"performance":[100],"terms":[102,125],"quality":[104],"speaker":[106,158],"similarity.Our":[107],"experiments":[108],"show":[109],"voice":[112],"cloning":[113],"built":[115],"vector":[117],"quantization":[118],"has":[119,130],"only":[120],"small":[122],"degradation":[123],"perceptive":[127],"evaluations,":[128],"but":[129],"discrete":[132],"space":[134],"reducing":[139],"representation":[141],"bit-rate,":[142],"which":[143,154],"desirable":[145],"data":[147],"transferring,":[148],"or":[149],"limiting":[150],"information":[152],"leaking,":[153],"important":[156],"anonymization":[159],"other":[161],"tasks":[162],"nature.":[165]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
