{"id":"https://openalex.org/W4200047557","doi":"https://doi.org/10.1109/ictc52510.2021.9621175","title":"A Preliminary Study on Wav2Vec 2.0 Embeddings for Text-to-Speech","display_name":"A Preliminary Study on Wav2Vec 2.0 Embeddings for Text-to-Speech","publication_year":2021,"publication_date":"2021-10-20","ids":{"openalex":"https://openalex.org/W4200047557","doi":"https://doi.org/10.1109/ictc52510.2021.9621175"},"language":"en","primary_location":{"id":"doi:10.1109/ictc52510.2021.9621175","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc52510.2021.9621175","pdf_url":null,"source":{"id":"https://openalex.org/S4363607766","display_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060628797","display_name":"Yohan Lim","orcid":"https://orcid.org/0009-0005-0007-4055"},"institutions":[{"id":"https://openalex.org/I118692353","display_name":"University of Science and Technology","ror":"https://ror.org/05bj7sh33","country_code":"YE","type":"education","lineage":["https://openalex.org/I118692353"]},{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR","YE"],"is_corresponding":false,"raw_author_name":"Yohan Lim","raw_affiliation_strings":["Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","institution_ids":["https://openalex.org/I142401562"]},{"raw_affiliation_string":"University of Science and Technology","institution_ids":["https://openalex.org/I118692353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074697874","display_name":"Nam-Hyeong Kim","orcid":"https://orcid.org/0000-0003-2731-4600"},"institutions":[{"id":"https://openalex.org/I118692353","display_name":"University of Science and Technology","ror":"https://ror.org/05bj7sh33","country_code":"YE","type":"education","lineage":["https://openalex.org/I118692353"]},{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR","YE"],"is_corresponding":false,"raw_author_name":"Namhyeong Kim","raw_affiliation_strings":["Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","institution_ids":["https://openalex.org/I142401562"]},{"raw_affiliation_string":"University of Science and Technology","institution_ids":["https://openalex.org/I118692353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058594241","display_name":"Seung Yun","orcid":"https://orcid.org/0000-0002-4610-1777"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seung Yun","raw_affiliation_strings":["Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100705921","display_name":"Sanghun Kim","orcid":"https://orcid.org/0000-0002-1423-6116"},"institutions":[{"id":"https://openalex.org/I118692353","display_name":"University of Science and Technology","ror":"https://ror.org/05bj7sh33","country_code":"YE","type":"education","lineage":["https://openalex.org/I118692353"]},{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR","YE"],"is_corresponding":false,"raw_author_name":"Sanghun Kim","raw_affiliation_strings":["Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","institution_ids":["https://openalex.org/I142401562"]},{"raw_affiliation_string":"University of Science and Technology","institution_ids":["https://openalex.org/I118692353"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011760560","display_name":"Seung\u2010Ik Lee","orcid":"https://orcid.org/0000-0003-2986-7540"},"institutions":[{"id":"https://openalex.org/I118692353","display_name":"University of Science and Technology","ror":"https://ror.org/05bj7sh33","country_code":"YE","type":"education","lineage":["https://openalex.org/I118692353"]},{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR","YE"],"is_corresponding":false,"raw_author_name":"Seung-Ik Lee","raw_affiliation_strings":["Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute, Dae-Jeon, Republic of Korea","institution_ids":["https://openalex.org/I142401562"]},{"raw_affiliation_string":"University of Science and Technology","institution_ids":["https://openalex.org/I118692353"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5073,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.66608133,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"343","last_page":"347"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7456029653549194},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7221733331680298},{"id":"https://openalex.org/keywords/decodes","display_name":"Decodes","score":0.6891381144523621},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.6499605774879456},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6034739017486572},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5974285006523132},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5672093629837036},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5385061502456665},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5234869718551636},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4725850820541382},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46864053606987},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4608592092990875},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41323965787887573},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.2391815185546875},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09195861220359802},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08247342705726624},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.058396339416503906},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.056744009256362915}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7456029653549194},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7221733331680298},{"id":"https://openalex.org/C2778858076","wikidata":"https://www.wikidata.org/wiki/Q5249539","display_name":"Decodes","level":3,"score":0.6891381144523621},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6499605774879456},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6034739017486572},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5974285006523132},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5672093629837036},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5385061502456665},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5234869718551636},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4725850820541382},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46864053606987},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4608592092990875},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41323965787887573},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2391815185546875},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09195861220359802},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08247342705726624},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.058396339416503906},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.056744009256362915},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ictc52510.2021.9621175","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc52510.2021.9621175","pdf_url":null,"source":{"id":"https://openalex.org/S4363607766","display_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G4484368752","display_name":null,"funder_award_id":"21ZS1100","funder_id":"https://openalex.org/F4320322093","funder_display_name":"Electronics and Telecommunications Research Institute"}],"funders":[{"id":"https://openalex.org/F4320322093","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2120847449","https://openalex.org/W2129142580","https://openalex.org/W2154920538","https://openalex.org/W2519091744","https://openalex.org/W2584505851","https://openalex.org/W2752796333","https://openalex.org/W2777302760","https://openalex.org/W2795935804","https://openalex.org/W2896457183","https://openalex.org/W2903739847","https://openalex.org/W2940544976","https://openalex.org/W2946200149","https://openalex.org/W2962780374","https://openalex.org/W2963799213","https://openalex.org/W2964243274","https://openalex.org/W2973026522","https://openalex.org/W2981728663","https://openalex.org/W3015338123","https://openalex.org/W3024605872","https://openalex.org/W3033411150","https://openalex.org/W3036601975","https://openalex.org/W3112092703","https://openalex.org/W3112616666","https://openalex.org/W3161627112","https://openalex.org/W3173767661","https://openalex.org/W3198275944","https://openalex.org/W4287173589","https://openalex.org/W6682918086"],"related_works":["https://openalex.org/W2147073653","https://openalex.org/W1974895211","https://openalex.org/W1982067419","https://openalex.org/W2176409448","https://openalex.org/W2129841057","https://openalex.org/W3040712279","https://openalex.org/W4240738626","https://openalex.org/W2364769705","https://openalex.org/W2050958351","https://openalex.org/W2039489009"],"abstract_inverted_index":{"Wav2Vec":[0],"2.0":[1],"(W2V),":[2],"a":[3,66,78],"self-supervised":[4],"speech":[5,11],"representation":[6],"trained":[7],"with":[8],"massive":[9],"unlabeled":[10],"data,":[12],"showed":[13],"promising":[14],"results":[15],"on":[16],"Automatic":[17],"Speech":[18],"Recognition":[19],"(ASR).":[20],"In":[21,43],"spite":[22],"of":[23,60],"several":[24],"evidences":[25],"showing":[26],"that":[27,90],"W2V":[28,48,71,79,91],"can":[29],"generate":[30],"unique":[31],"acoustic":[32,97],"features,":[33],"it":[34],"has":[35],"been":[36],"rarely":[37],"utilized":[38],"in":[39],"Text-to-Speech":[40],"(TTS)":[41],"task.":[42],"this":[44],"paper,":[45],"we":[46,88],"adapt":[47],"embed":[49],"dings":[50],"to":[51,82],"TTS":[52,57],"as":[53,96],"feature":[54],"vectors.":[55],"Our":[56],"model":[58],"consists":[59],"two":[61],"components:":[62],"Text2Vec,":[63],"which":[64,76],"converts":[65],"character-level":[67],"text":[68],"sequence":[69,81],"into":[70],"embeddings,":[72],"and":[73],"GAN-based":[74],"vocoder,":[75],"decodes":[77],"embedding":[80],"waveform":[83],"signals.":[84],"From":[85],"the":[86],"experiments,":[87],"observe":[89],"embeddings":[92],"have":[93],"considerable":[94],"potential":[95],"features":[98],"for":[99],"TTS.":[100]},"counts_by_year":[{"year":2024,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
