{"id":"https://openalex.org/W2954882393","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023186","title":"End-to-End Emotional Speech Synthesis Using Style Tokens and Semi-Supervised Training","display_name":"End-to-End Emotional Speech Synthesis Using Style Tokens and Semi-Supervised Training","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W2954882393","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023186","mag":"2954882393"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023186","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023186","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1906.10859","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030109120","display_name":"Pengfei Wu","orcid":"https://orcid.org/0000-0003-0896-9476"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengfei Wu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China","University of, Science and Technology of China, Hefei, China#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"University of, Science and Technology of China, Hefei, China#TAB#","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Ling","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China","University of, Science and Technology of China, Hefei, China#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"University of, Science and Technology of China, Hefei, China#TAB#","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100343789","display_name":"Lijuan Liu","orcid":"https://orcid.org/0009-0007-0163-7336"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lijuan Liu","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R.China","iFLYTEK Research, iFLYTEK Co., Ltd.,Hefei,P.R.China"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R.China","institution_ids":[]},{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd.,Hefei,P.R.China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064657549","display_name":"Yuan Jiang","orcid":"https://orcid.org/0000-0003-4307-0562"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan Jiang","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R.China","iFLYTEK Research, iFLYTEK Co., Ltd.,Hefei,P.R.China"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R.China","institution_ids":[]},{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd.,Hefei,P.R.China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042279730","display_name":"Hong-Chuan Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongchuan Wu","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R.China","iFLYTEK Research, iFLYTEK Co., Ltd.,Hefei,P.R.China"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R.China","institution_ids":[]},{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd.,Hefei,P.R.China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057227915","display_name":"Li-Rong Dai","orcid":"https://orcid.org/0000-0002-0859-2827"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lirong Dai","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China","University of, Science and Technology of China, Hefei, China#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"University of, Science and Technology of China, Hefei, China#TAB#","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5030109120"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.7233,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78418744,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"623","last_page":"627"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7922592163085938},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6632617712020874},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.6053102016448975},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.581872820854187},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5621152520179749},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.5235440731048584},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5195801258087158},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47364985942840576},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4249756336212158},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4222128689289093},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.41543814539909363},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.23690718412399292}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7922592163085938},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6632617712020874},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.6053102016448975},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.581872820854187},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5621152520179749},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.5235440731048584},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5195801258087158},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47364985942840576},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4249756336212158},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4222128689289093},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.41543814539909363},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.23690718412399292},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023186","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023186","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1906.10859","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.10859","pdf_url":"https://arxiv.org/pdf/1906.10859","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2954882393","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1906.10859.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1906.10859","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1906.10859","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1906.10859","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.10859","pdf_url":"https://arxiv.org/pdf/1906.10859","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2954882393.pdf","grobid_xml":"https://content.openalex.org/works/W2954882393.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W152645600","https://openalex.org/W1546776967","https://openalex.org/W2019540503","https://openalex.org/W2039800941","https://openalex.org/W2085013480","https://openalex.org/W2102003408","https://openalex.org/W2107740512","https://openalex.org/W2150791533","https://openalex.org/W2294797155","https://openalex.org/W2519091744","https://openalex.org/W2770743791","https://openalex.org/W2787378487","https://openalex.org/W2793479148","https://openalex.org/W2795109282","https://openalex.org/W2805307277","https://openalex.org/W2962691331","https://openalex.org/W2963272440","https://openalex.org/W2963609956","https://openalex.org/W2963927338","https://openalex.org/W2963975282","https://openalex.org/W2964243274","https://openalex.org/W6606247353","https://openalex.org/W6632972894","https://openalex.org/W6675938391","https://openalex.org/W6696843773","https://openalex.org/W6746238782","https://openalex.org/W6750489868","https://openalex.org/W6751998403","https://openalex.org/W6753855596"],"related_works":["https://openalex.org/W3010916717","https://openalex.org/W3029690717","https://openalex.org/W2164314347","https://openalex.org/W3100294849","https://openalex.org/W2966387353","https://openalex.org/W2919787052","https://openalex.org/W3018158305","https://openalex.org/W2926420696","https://openalex.org/W3206725777","https://openalex.org/W2968201928","https://openalex.org/W2936173226","https://openalex.org/W3169893188","https://openalex.org/W2095234413","https://openalex.org/W1504365409","https://openalex.org/W3213710575","https://openalex.org/W3049147448","https://openalex.org/W2808336242","https://openalex.org/W3165405064","https://openalex.org/W3137257141","https://openalex.org/W2952622013"],"abstract_inverted_index":{"This":[0,19],"paper":[1],"proposes":[2],"an":[3],"end-to-end":[4],"emotional":[5],"speech":[6],"synthesis":[7],"(ESS)":[8],"method":[9,73],"which":[10],"adopts":[11],"global":[12],"style":[13,29,55,79],"tokens":[14,30,56,80],"(GSTs)":[15],"for":[16,99],"semi-supervised":[17],"training.":[18],"model":[20,93,98,118],"is":[21,48,113],"built":[22],"based":[23],"on":[24],"the":[25,52,58,95,116],"GST-Tacotron":[26],"framework.":[27],"The":[28],"are":[31],"defined":[32],"to":[33,50,115],"present":[34],"emotion":[35,46,65,82,108,122],"categories.":[36],"A":[37],"cross":[38],"entropy":[39],"loss":[40],"function":[41],"between":[42,78],"token":[43],"weights":[44],"and":[45,81,86],"labels":[47],"designed":[49],"obtain":[51],"interpretability":[53],"of":[54,61,104],"utilizing":[57],"small":[59],"portion":[60],"training":[62,105],"data":[63,106],"with":[64],"labels.":[66,109,123],"Emotion":[67],"recognition":[68],"experiments":[69],"confirm":[70],"that":[71,91],"this":[72],"can":[74],"achieve":[75],"one-to-one":[76],"correspondence":[77],"categories":[83],"effectively.":[84],"Objective":[85],"subjective":[87,111],"evaluation":[88],"results":[89],"show":[90],"our":[92],"outperforms":[94],"conventional":[96],"Tacotron":[97,117],"ESS":[100],"when":[101],"only":[102],"5%":[103],"has":[107],"Its":[110],"performance":[112],"close":[114],"trained":[119],"using":[120],"all":[121]},"counts_by_year":[{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
