{"id":"https://openalex.org/W4223506691","doi":"https://doi.org/10.1109/iceic54506.2022.9748515","title":"Effective Data Augmentation Methods for Neural Text-to-Speech Systems","display_name":"Effective Data Augmentation Methods for Neural Text-to-Speech Systems","publication_year":2022,"publication_date":"2022-02-06","ids":{"openalex":"https://openalex.org/W4223506691","doi":"https://doi.org/10.1109/iceic54506.2022.9748515"},"language":"en","primary_location":{"id":"doi:10.1109/iceic54506.2022.9748515","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iceic54506.2022.9748515","pdf_url":null,"source":{"id":"https://openalex.org/S4363608213","display_name":"2022 International Conference on Electronics, Information, and Communication (ICEIC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Electronics, Information, and Communication (ICEIC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035007313","display_name":"Suhyeon Oh","orcid":null},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Suhyeon Oh","raw_affiliation_strings":["NAVER Corp.,Seongnam,Korea","NAVER Corp., Seongnam, Korea"],"affiliations":[{"raw_affiliation_string":"NAVER Corp.,Seongnam,Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101576669","display_name":"Ohsung Kwon","orcid":"https://orcid.org/0000-0003-3880-0911"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ohsung Kwon","raw_affiliation_strings":["NAVER Corp.,Seongnam,Korea","NAVER Corp., Seongnam, Korea"],"affiliations":[{"raw_affiliation_string":"NAVER Corp.,Seongnam,Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073244533","display_name":"Min-Jae Hwang","orcid":null},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Min-Jae Hwang","raw_affiliation_strings":["NAVER Corp.,Seongnam,Korea","NAVER Corp., Seongnam, Korea"],"affiliations":[{"raw_affiliation_string":"NAVER Corp.,Seongnam,Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101480212","display_name":"Jae-Min Kim","orcid":"https://orcid.org/0000-0001-9309-9369"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jae-Min Kim","raw_affiliation_strings":["NAVER Corp.,Seongnam,Korea","NAVER Corp., Seongnam, Korea"],"affiliations":[{"raw_affiliation_string":"NAVER Corp.,Seongnam,Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5104035145","display_name":"Eunwoo Song","orcid":"https://orcid.org/0000-0003-0642-7083"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Eunwoo Song","raw_affiliation_strings":["NAVER Corp.,Seongnam,Korea","NAVER Corp., Seongnam, Korea"],"affiliations":[{"raw_affiliation_string":"NAVER Corp.,Seongnam,Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea","institution_ids":["https://openalex.org/I60922564"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5035007313"],"corresponding_institution_ids":["https://openalex.org/I60922564"],"apc_list":null,"apc_paid":null,"fwci":0.1039,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.23233139,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"2013","issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7992929220199585},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.7261233925819397},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6503521203994751},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5661176443099976},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5653486251831055},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5497584342956543},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5106862187385559},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4900039732456207},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.41222280263900757},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4109395742416382},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3862413167953491},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34926652908325195},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07022476196289062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7992929220199585},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.7261233925819397},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6503521203994751},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5661176443099976},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5653486251831055},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5497584342956543},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5106862187385559},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4900039732456207},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.41222280263900757},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4109395742416382},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3862413167953491},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34926652908325195},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07022476196289062},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iceic54506.2022.9748515","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iceic54506.2022.9748515","pdf_url":null,"source":{"id":"https://openalex.org/S4363608213","display_name":"2022 International Conference on Electronics, Information, and Communication (ICEIC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Electronics, Information, and Communication (ICEIC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1533861849","https://openalex.org/W1994396704","https://openalex.org/W2102003408","https://openalex.org/W2294130536","https://openalex.org/W2519091744","https://openalex.org/W2535388113","https://openalex.org/W2751205669","https://openalex.org/W2902681596","https://openalex.org/W2903739847","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2968917279","https://openalex.org/W2972597685","https://openalex.org/W2984862052","https://openalex.org/W3007859642","https://openalex.org/W3008691130","https://openalex.org/W3015338123","https://openalex.org/W3080626558","https://openalex.org/W3097264669","https://openalex.org/W3141224548","https://openalex.org/W3160326269","https://openalex.org/W3161890269","https://openalex.org/W3196969505","https://openalex.org/W6631943919","https://openalex.org/W6675380101","https://openalex.org/W6756686980","https://openalex.org/W6767164110","https://openalex.org/W7073743773"],"related_works":["https://openalex.org/W4312659495","https://openalex.org/W4385366257","https://openalex.org/W3101007570","https://openalex.org/W4387910575","https://openalex.org/W3176425421","https://openalex.org/W3091312527","https://openalex.org/W1555087354","https://openalex.org/W4287865546","https://openalex.org/W3007306981","https://openalex.org/W2949555133"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"an":[3],"effective":[4],"self-augmentation":[5],"method":[6],"for":[7,46,87],"improving":[8,47],"the":[9,38,97,112,119,133,136,151,159,168,175],"quality":[10,19],"of":[11,40,71,135],"neural":[12,26],"text-to-speech":[13],"(TTS)":[14],"systems.":[15],"As":[16,122],"synthetic":[17,30,41,63,98,131,176],"speech":[18,114],"has":[20,50],"been":[21,52],"greatly":[22],"improved,":[23],"creating":[24],"a":[25,77],"TTS":[27,137,153],"system":[28,171],"using":[29],"corpora":[31,101],"is":[32,43,59,84,107,115,143],"now":[33],"possible.":[34],"However,":[35],"whether":[36],"increasing":[37],"amount":[39],"data":[42,64,124,142,165,177],"always":[44],"beneficial":[45],"training":[48,123],"efficiency":[49],"not":[51],"verified.":[53],"Our":[54],"aim":[55],"in":[56],"this":[57],"study":[58],"to":[60,69,109],"selectively":[61,127],"choose":[62],"whose":[65],"characteristics":[66],"are":[67],"close":[68],"those":[70,141],"natural":[72],"speech.":[73],"Specifically,":[74],"we":[75],"adopt":[76],"ranking":[78,89],"support":[79],"vector":[80],"machine":[81],"(RankSVM)":[82],"that":[83,150],"well":[85],"known":[86],"effectively":[88],"relative":[90],"attributes":[91],"among":[92],"binary":[93],"classes.":[94],"By":[95],"setting":[96],"and":[99,167],"recorded":[100,120,164],"as":[102],"two":[103],"opposite":[104],"classes,":[105],"RankSVM":[106],"used":[108],"determine":[110],"how":[111],"synthesized":[113],"acoustically":[116],"similar":[117],"with":[118,163,173],"data.":[121],"can":[125],"be":[126],"chosen":[128],"from":[129],"large-scale":[130],"corpora,":[132],"performance":[134],"model":[138,154,161],"re-trained":[139,172],"by":[140],"significantly":[144],"improved.":[145],"Subjective":[146],"evaluation":[147],"results":[148],"verify":[149],"proposed":[152],"performs":[155],"much":[156],"better":[157],"than":[158],"original":[160],"trained":[162],"alone":[166],"similarly":[169],"configured":[170],"all":[174],"without":[178],"any":[179],"selection":[180],"method.":[181]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
