{"id":"https://openalex.org/W4411600219","doi":"https://doi.org/10.1109/taffc.2025.3582715","title":"Hierarchical Control of Emotion Rendering in Speech Synthesis","display_name":"Hierarchical Control of Emotion Rendering in Speech Synthesis","publication_year":2025,"publication_date":"2025-06-24","ids":{"openalex":"https://openalex.org/W4411600219","doi":"https://doi.org/10.1109/taffc.2025.3582715"},"language":"en","primary_location":{"id":"doi:10.1109/taffc.2025.3582715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taffc.2025.3582715","pdf_url":null,"source":{"id":"https://openalex.org/S104780363","display_name":"IEEE Transactions on Affective Computing","issn_l":"1949-3045","issn":["1949-3045","2371-9850"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Affective Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108413182","display_name":"Sho Inoue","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sho Inoue","raw_affiliation_strings":["School of Data Science, The Chinese University of Hong Kong, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-7498-8611","affiliations":[{"raw_affiliation_string":"School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101654458","display_name":"Kun Zhou","orcid":"https://orcid.org/0000-0002-7869-4474"},"institutions":[{"id":"https://openalex.org/I4210086143","display_name":"Alibaba Group (Cayman Islands)","ror":"https://ror.org/00mnrxf72","country_code":"KY","type":"company","lineage":["https://openalex.org/I4210086143","https://openalex.org/I45928872"]},{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]},{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN","KY","US"],"is_corresponding":false,"raw_author_name":"Kun Zhou","raw_affiliation_strings":["Tongyi Speech Lab, Alibaba Group, Singapore","School of Data Science, The Chinese University of Hong Kong, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-7869-4474","affiliations":[{"raw_affiliation_string":"Tongyi Speech Lab, Alibaba Group, Singapore","institution_ids":["https://openalex.org/I4210095624","https://openalex.org/I4210086143","https://openalex.org/I45928872"]},{"raw_affiliation_string":"School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shuai Wang","orcid":"https://orcid.org/0000-0003-1523-9631"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai Wang","raw_affiliation_strings":["School of Intelligence Science and Technology, Nanjing University, Suzhou, China","School of Data Science, The Chinese University of Hong Kong, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-1523-9631","affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Nanjing University, Suzhou, China","institution_ids":["https://openalex.org/I308837","https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["School of Data Science, The Chinese University of Hong Kong, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-9158-9401","affiliations":[{"raw_affiliation_string":"School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8699,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.8610708,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"16","issue":"4","first_page":"3316","last_page":"3328"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.8009999990463257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.8009999990463257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.6888999938964844,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.6481999754905701,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.7404370903968811},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5095696449279785},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45983996987342834},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.42878901958465576},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4274398982524872},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.36206451058387756},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.35391300916671753},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32795536518096924}],"concepts":[{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.7404370903968811},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5095696449279785},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45983996987342834},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.42878901958465576},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4274398982524872},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36206451058387756},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.35391300916671753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32795536518096924}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taffc.2025.3582715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taffc.2025.3582715","pdf_url":null,"source":{"id":"https://openalex.org/S104780363","display_name":"IEEE Transactions on Affective Computing","issn_l":"1949-3045","issn":["1949-3045","2371-9850"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Affective Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2799040894","display_name":null,"funder_award_id":"62271432","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W138045608","https://openalex.org/W388732865","https://openalex.org/W1581458799","https://openalex.org/W1965069145","https://openalex.org/W1980650959","https://openalex.org/W1981322637","https://openalex.org/W2023736093","https://openalex.org/W2032532974","https://openalex.org/W2074788634","https://openalex.org/W2102298245","https://openalex.org/W2107860279","https://openalex.org/W2108827278","https://openalex.org/W2150791533","https://openalex.org/W2294130536","https://openalex.org/W2609635007","https://openalex.org/W2785960144","https://openalex.org/W2803098682","https://openalex.org/W2885005742","https://openalex.org/W2894821115","https://openalex.org/W2936774411","https://openalex.org/W2962788625","https://openalex.org/W3008691130","https://openalex.org/W3010916717","https://openalex.org/W3015249983","https://openalex.org/W3025680351","https://openalex.org/W3095491807","https://openalex.org/W3096831136","https://openalex.org/W3096939667","https://openalex.org/W3112497262","https://openalex.org/W3135644023","https://openalex.org/W3142644187","https://openalex.org/W3152136404","https://openalex.org/W3160329778","https://openalex.org/W3162791003","https://openalex.org/W3163573274","https://openalex.org/W3197704090","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4205742757","https://openalex.org/W4210777104","https://openalex.org/W4221147462","https://openalex.org/W4226421465","https://openalex.org/W4283771593","https://openalex.org/W4297841795","https://openalex.org/W4313316128","https://openalex.org/W4319985616","https://openalex.org/W4323896824","https://openalex.org/W4361994820","https://openalex.org/W4366493008","https://openalex.org/W4372340947","https://openalex.org/W4382722605","https://openalex.org/W4385245566","https://openalex.org/W4385822378","https://openalex.org/W4390480998","https://openalex.org/W4392903647","https://openalex.org/W4392904615","https://openalex.org/W4392931276","https://openalex.org/W4393372125","https://openalex.org/W4402112392","https://openalex.org/W4402669711","https://openalex.org/W4404035888","https://openalex.org/W4406860647"],"related_works":["https://openalex.org/W2582295320","https://openalex.org/W2174759944","https://openalex.org/W2060809589","https://openalex.org/W3109786615","https://openalex.org/W4210771477","https://openalex.org/W2113492357","https://openalex.org/W2990948995","https://openalex.org/W2081348959","https://openalex.org/W3080983534","https://openalex.org/W2406416307"],"abstract_inverted_index":{"Emotional":[0],"text-to-speech":[1],"synthesis":[2],"(TTS)":[3],"aims":[4],"to":[5,40],"generate":[6],"realistic":[7],"emotional":[8,29,119,150],"speech":[9,70,120,132,148],"from":[10,101],"input":[11],"text.":[12],"However,":[13],"quantitatively":[14,125],"controlling":[15],"multi-level":[16],"emotion":[17,37,45,58,84,99,128,154],"rendering":[18,46,129],"remains":[19],"challenging.":[20],"In":[21],"this":[22],"paper,":[23],"we":[24,74],"propose":[25],"a":[26,33,56,64],"flow-matching":[27],"based":[28],"TTS":[30,88,114],"framework":[31,144],"with":[32,108],"novel":[34],"approach":[35],"for":[36],"intensity":[38,85,100],"modeling":[39],"facilitate":[41],"fine-grained":[42],"control":[43],"over":[44,130],"at":[47],"the":[48,90,96,102,127,131,140],"phoneme,":[49],"word,":[50],"and":[51,79,105,110,136,152],"utterance":[52],"levels.":[53,72],"We":[54],"introduce":[55],"hierarchical":[57,91,153],"distribution":[59],"(ED)":[60],"extractor":[61],"that":[62],"captures":[63,95],"quantifiable":[65],"ED":[66,92],"embedding":[67,93],"across":[68],"different":[69],"segment":[71],"Additionally,":[73],"explore":[75],"various":[76],"acoustic":[77],"features":[78],"assess":[80],"their":[81],"impact":[82],"on":[83],"modeling.":[86],"During":[87],"training,":[89],"effectively":[94],"variance":[97],"in":[98,145],"reference":[103],"audio":[104],"correlates":[106],"it":[107],"linguistic":[109],"speaker":[111],"information.":[112],"The":[113],"model":[115],"not":[116],"only":[117],"generates":[118],"during":[121],"inference,":[122],"but":[123],"also":[124],"controls":[126],"constituents.":[133],"Both":[134],"objective":[135],"subjective":[137],"evaluations":[138],"demonstrate":[139],"effectiveness":[141],"of":[142,147],"our":[143],"terms":[146],"quality,":[149],"expressiveness,":[151],"control.":[155]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
