{"id":"https://openalex.org/W4283771593","doi":"https://doi.org/10.21437/interspeech.2022-11133","title":"Language Model-Based Emotion Prediction Methods for Emotional Speech Synthesis Systems","display_name":"Language Model-Based Emotion Prediction Methods for Emotional Speech Synthesis Systems","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4283771593","doi":"https://doi.org/10.21437/interspeech.2022-11133"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-11133","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11133","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032657615","display_name":"Hyun-Wook Yoon","orcid":"https://orcid.org/0000-0002-2849-4140"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hyun-Wook Yoon","raw_affiliation_strings":["NAVER Corp., Seongnam, Korea,"],"affiliations":[{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea,","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101576669","display_name":"Ohsung Kwon","orcid":"https://orcid.org/0000-0003-3880-0911"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ohsung Kwon","raw_affiliation_strings":["NAVER Corp., Seongnam, Korea,"],"affiliations":[{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea,","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040581340","display_name":"Hoyeon Lee","orcid":"https://orcid.org/0000-0002-1165-1509"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hoyeon Lee","raw_affiliation_strings":["NAVER Corp., Seongnam, Korea,"],"affiliations":[{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea,","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669485","display_name":"Ryuichi Yamamoto","orcid":"https://orcid.org/0000-0003-0299-5470"},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryuichi Yamamoto","raw_affiliation_strings":["LINE Corp., Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"LINE Corp., Tokyo, Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104035145","display_name":"Eunwoo Song","orcid":"https://orcid.org/0000-0003-0642-7083"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Eunwoo Song","raw_affiliation_strings":["NAVER Corp., Seongnam, Korea,"],"affiliations":[{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea,","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012117775","display_name":"Jae\u2010Min Kim","orcid":"https://orcid.org/0000-0001-7409-6306"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jae-Min Kim","raw_affiliation_strings":["NAVER Corp., Seongnam, Korea,"],"affiliations":[{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea,","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055817509","display_name":"Min-Jae Hwang","orcid":"https://orcid.org/0000-0002-7376-009X"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Min-Jae Hwang","raw_affiliation_strings":["NAVER Corp., Seongnam, Korea,"],"affiliations":[{"raw_affiliation_string":"NAVER Corp., Seongnam, Korea,","institution_ids":["https://openalex.org/I60922564"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5032657615"],"corresponding_institution_ids":["https://openalex.org/I60922564"],"apc_list":null,"apc_paid":null,"fwci":1.1434,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.79664943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4596","last_page":"4600"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.778078556060791},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5747507214546204},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5103897452354431},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4457816481590271},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4112367033958435}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.778078556060791},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5747507214546204},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5103897452354431},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4457816481590271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4112367033958435}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-11133","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11133","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1501669607","https://openalex.org/W1667249920","https://openalex.org/W1959608418","https://openalex.org/W2090777335","https://openalex.org/W2187089797","https://openalex.org/W2751205669","https://openalex.org/W2794490148","https://openalex.org/W2884607399","https://openalex.org/W2885800352","https://openalex.org/W2896457183","https://openalex.org/W2904459034","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2966387353","https://openalex.org/W3008691130","https://openalex.org/W3015338123","https://openalex.org/W3033411150","https://openalex.org/W3097264669","https://openalex.org/W3146550708","https://openalex.org/W3160326269","https://openalex.org/W3160329778","https://openalex.org/W3162791003","https://openalex.org/W3174285493","https://openalex.org/W3196969505","https://openalex.org/W4229506649","https://openalex.org/W4241676240","https://openalex.org/W4287117308","https://openalex.org/W4287375043","https://openalex.org/W4292779060","https://openalex.org/W4294721141","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"an":[3,50],"effective":[4],"emotional":[5,70,92,107,123],"text-to-speech":[6],"(TTS)":[7],"system":[8,31,89],"with":[9],"a":[10],"pre-trained":[11,43],"language":[12],"model":[13,81],"(LM)-based":[14],"emotion":[15,28,51],"prediction":[16],"method.Unlike":[17],"conventional":[18],"systems":[19],"that":[20],"require":[21],"auxiliary":[22,99],"inputs":[23],"such":[24],"as":[25,75],"manually":[26],"defined":[27],"classes,":[29],"our":[30],"directly":[32],"estimates":[33],"emotion-related":[34],"attributes":[35,65],"from":[36,95],"the":[37,69,79,87,102,110,113,119],"input":[38],"text.Specifically,":[39],"we":[40],"utilize":[41],"generative":[42],"transformer":[44],"(GPT)-3":[45],"to":[46,105],"jointly":[47],"predict":[48],"both":[49],"class":[52],"and":[53,60,73],"its":[54],"strength":[55],"in":[56,68],"representing":[57],"emotions'":[58],"coarse":[59],"fine":[61],"properties,":[62],"respectively.Then,":[63],"these":[64],"are":[66],"combined":[67],"embedding":[71],"space":[72],"used":[74],"conditional":[76],"features":[77],"of":[78,122],"TTS":[80],"for":[82],"generating":[83],"output":[84],"speech":[85,93],"signals.Consequently,":[86],"proposed":[88,114],"can":[90,116],"produce":[91],"only":[94],"text":[96],"without":[97],"any":[98],"inputs.Furthermore,":[100],"because":[101],"GPT-3":[103],"enables":[104],"capture":[106],"context":[108],"among":[109],"consecutive":[111],"sentences,":[112],"method":[115],"effectively":[117],"handle":[118],"paragraph-level":[120],"generation":[121],"speech.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
