{"id":"https://openalex.org/W4293523272","doi":"https://doi.org/10.1109/icme52920.2022.9859769","title":"Fusing Acoustic and Text Emotional Features for Expressive Speech Synthesis","display_name":"Fusing Acoustic and Text Emotional Features for Expressive Speech Synthesis","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4293523272","doi":"https://doi.org/10.1109/icme52920.2022.9859769"},"language":"en","primary_location":{"id":"doi:10.1109/icme52920.2022.9859769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme52920.2022.9859769","pdf_url":null,"source":{"id":"https://openalex.org/S4363607799","display_name":"2022 IEEE International Conference on Multimedia and Expo (ICME)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100398504","display_name":"Ying Feng","orcid":"https://orcid.org/0000-0003-1045-4172"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I4210149102","display_name":"Sanya University","ror":"https://ror.org/04fa2qd52","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210149102"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ying Feng","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","institution_ids":["https://openalex.org/I196699116"]},{"raw_affiliation_string":"Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China","institution_ids":["https://openalex.org/I4210149102","https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080295555","display_name":"Pengfei Duan","orcid":"https://orcid.org/0000-0002-0912-0928"},"institutions":[{"id":"https://openalex.org/I4210149102","display_name":"Sanya University","ror":"https://ror.org/04fa2qd52","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210149102"]},{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengfei Duan","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","institution_ids":["https://openalex.org/I196699116"]},{"raw_affiliation_string":"Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China","institution_ids":["https://openalex.org/I4210149102","https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086323231","display_name":"Yunfei Zi","orcid":"https://orcid.org/0000-0002-4778-7109"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunfei Zi","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003775716","display_name":"Yaxiong Chen","orcid":"https://orcid.org/0000-0002-2903-6723"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I4210149102","display_name":"Sanya University","ror":"https://ror.org/04fa2qd52","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210149102"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaxiong Chen","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China","Chongqing Research Institute, Wuhan University of Technology, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","institution_ids":["https://openalex.org/I196699116"]},{"raw_affiliation_string":"Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China","institution_ids":["https://openalex.org/I4210149102","https://openalex.org/I196699116"]},{"raw_affiliation_string":"Chongqing Research Institute, Wuhan University of Technology, Chongqing, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011707621","display_name":"Shengwu Xiong","orcid":"https://orcid.org/0000-0002-4006-7029"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I4210149102","display_name":"Sanya University","ror":"https://ror.org/04fa2qd52","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210149102"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengwu Xiong","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology,Wuhan,China,430070","institution_ids":["https://openalex.org/I196699116"]},{"raw_affiliation_string":"Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, China","institution_ids":["https://openalex.org/I4210149102","https://openalex.org/I196699116"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100398504"],"corresponding_institution_ids":["https://openalex.org/I196699116","https://openalex.org/I4210149102"],"apc_list":null,"apc_paid":null,"fwci":0.2079,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.40890972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"01","last_page":"06"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8374040126800537},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.7481545209884644},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7427362203598022},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.7226407527923584},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7132402658462524},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7060380578041077},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5998722910881042},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.49348342418670654},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4898063838481903},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4895506799221039},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4747248888015747},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4688231647014618},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.44172829389572144},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.42326873540878296},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.327287495136261},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.23037436604499817},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.19816812872886658},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.062284916639328}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8374040126800537},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.7481545209884644},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7427362203598022},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.7226407527923584},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7132402658462524},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7060380578041077},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5998722910881042},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.49348342418670654},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4898063838481903},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4895506799221039},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4747248888015747},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4688231647014618},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.44172829389572144},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.42326873540878296},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.327287495136261},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.23037436604499817},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.19816812872886658},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.062284916639328},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme52920.2022.9859769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme52920.2022.9859769","pdf_url":null,"source":{"id":"https://openalex.org/S4363607799","display_name":"2022 IEEE International Conference on Multimedia and Expo (ICME)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2866233420","display_name":null,"funder_award_id":"62176194,62101393","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2102003408","https://openalex.org/W2294797155","https://openalex.org/W2788357188","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2808706139","https://openalex.org/W2903739847","https://openalex.org/W2946200149","https://openalex.org/W2963609956","https://openalex.org/W2963691546","https://openalex.org/W2964138190","https://openalex.org/W2964243274","https://openalex.org/W2970006822","https://openalex.org/W2972677740","https://openalex.org/W3010916717","https://openalex.org/W3015826515","https://openalex.org/W3128910262","https://openalex.org/W4295731579","https://openalex.org/W6675380101","https://openalex.org/W6748588790","https://openalex.org/W6749489859","https://openalex.org/W6750489868","https://openalex.org/W6752888775","https://openalex.org/W6761365177","https://openalex.org/W6763832098","https://openalex.org/W6767111847","https://openalex.org/W6790220310"],"related_works":["https://openalex.org/W4392904630","https://openalex.org/W169399214","https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W40885451","https://openalex.org/W1927421023","https://openalex.org/W10581632","https://openalex.org/W2108985546","https://openalex.org/W2081919107"],"abstract_inverted_index":{"Prominent":[0],"methods":[1,22],"based":[2],"on":[3,122],"Tacotron2":[4],"and":[5,47,55,71,102,130,138,141],"advanced":[6],"models":[7],"have":[8],"improved":[9],"the":[10,31,75,81,86,88,96,100,112,116,123,128,133,142,145],"quality":[11],"of":[12,85,115,132,144],"synthesized":[13,32],"speech.":[14,57],"However,":[15],"most":[16],"data-driven":[17],"Text-":[18],"To-Speech":[19],"(TTS)":[20],"synthesis":[21],"only":[23],"aim":[24],"to":[25,51,59,68,94],"achieve":[26],"reasonable":[27],"neutral":[28],"prosody,":[29],"so":[30],"speech":[33],"is":[34,92,108,148],"less":[35],"expressive.":[36],"In":[37],"this":[38],"paper,":[39],"a":[40],"method":[41],"was":[42],"proposed":[43],"which":[44],"fuses":[45],"acoustic":[46,61,64,117],"text":[48,101],"emotional":[49],"features":[50,84],"produce":[52],"more":[53],"vivid":[54],"realistic":[56],"Specifically,":[58],"obtain":[60,80],"features,":[62],"two":[63],"encoders":[65],"are":[66,136],"leveraged":[67],"extract":[69,95],"utterance-level":[70],"phoneme-level":[72],"vectors":[73],"from":[74,99],"target":[76],"speech,":[77],"respectively.":[78],"To":[79],"objective":[82],"sentiment":[83,89,97],"text,":[87],"analysis":[90],"model":[91],"exploited":[93],"vector":[98,107,114],"expand":[103],"it.":[104],"The":[105,119],"expanded":[106],"feature-":[109],"fused":[110],"with":[111],"output":[113],"model.":[118],"experimental":[120],"results":[121],"LJSpeech":[124],"dataset":[125],"show":[126],"that":[127],"naturalness":[129],"expressiveness":[131],"MOS":[134],"score":[135,147],"3.63":[137],"3.45,":[139],"respectively,":[140],"similarity":[143],"SMOS":[146],"4.14.":[149]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
