{"id":"https://openalex.org/W3135547455","doi":"https://doi.org/10.1109/iscslp49672.2021.9362098","title":"Estimating Mutual Information in Prosody Representation for Emotional Prosody Transfer in Speech Synthesis","display_name":"Estimating Mutual Information in Prosody Representation for Emotional Prosody Transfer in Speech Synthesis","publication_year":2021,"publication_date":"2021-01-24","ids":{"openalex":"https://openalex.org/W3135547455","doi":"https://doi.org/10.1109/iscslp49672.2021.9362098","mag":"3135547455"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp49672.2021.9362098","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp49672.2021.9362098","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 12th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102854878","display_name":"Guangyan Zhang","orcid":"https://orcid.org/0000-0002-3480-5902"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangyan Zhang","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering","Department of Electronic Engineering, The Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058971129","display_name":"Shirong Qiu","orcid":"https://orcid.org/0000-0003-0238-8312"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shirong Qiu","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering","Department of Electronic Engineering, The Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013250925","display_name":"Ying Qin","orcid":"https://orcid.org/0000-0003-4606-7174"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Qin","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering","Department of Electronic Engineering, The Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001795601","display_name":"Tan Lee","orcid":"https://orcid.org/0000-0002-7089-3436"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tan Lee","raw_affiliation_strings":["The Chinese University of Hong Kong,Department of Electronic Engineering","Department of Electronic Engineering, The Chinese University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Department of Electronic Engineering","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102854878"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":1.0877,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.81161939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.9439930319786072},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6834233999252319},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.632624626159668},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5609254240989685},{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.5604513883590698},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43593645095825195},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43057894706726074},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38510116934776306},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12134385108947754}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.9439930319786072},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6834233999252319},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.632624626159668},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5609254240989685},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.5604513883590698},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43593645095825195},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43057894706726074},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38510116934776306},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12134385108947754},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp49672.2021.9362098","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp49672.2021.9362098","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 12th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W126988493","https://openalex.org/W198298781","https://openalex.org/W567437002","https://openalex.org/W588342931","https://openalex.org/W1570629387","https://openalex.org/W1875231349","https://openalex.org/W1931958196","https://openalex.org/W1966797434","https://openalex.org/W1971670143","https://openalex.org/W1976725440","https://openalex.org/W2003547693","https://openalex.org/W2006715603","https://openalex.org/W2043843997","https://openalex.org/W2069631319","https://openalex.org/W2069859485","https://openalex.org/W2087110403","https://openalex.org/W2091425152","https://openalex.org/W2136144249","https://openalex.org/W2149350210","https://openalex.org/W2171121512","https://openalex.org/W2191779130","https://openalex.org/W2398561585","https://openalex.org/W2576411688","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2803832867","https://openalex.org/W2890606114","https://openalex.org/W2907262790","https://openalex.org/W2939488497","https://openalex.org/W2963272440","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2963927338","https://openalex.org/W2972921407","https://openalex.org/W4243316134","https://openalex.org/W4251867726","https://openalex.org/W4254718357","https://openalex.org/W4295731579","https://openalex.org/W6605187413","https://openalex.org/W6608022165","https://openalex.org/W6639350448","https://openalex.org/W6713020357","https://openalex.org/W6750489868","https://openalex.org/W6752051073"],"related_works":["https://openalex.org/W2355553914","https://openalex.org/W149862513","https://openalex.org/W2347684782","https://openalex.org/W187117048","https://openalex.org/W4320472397","https://openalex.org/W2401269021","https://openalex.org/W2466816617","https://openalex.org/W2145654520","https://openalex.org/W2750037515","https://openalex.org/W4319862652"],"abstract_inverted_index":{"An":[0],"end-to-end":[1,32,103],"prosody":[2,10,66,83,98,146,160,176],"transfer":[3,7,177],"system":[4,33,104],"aims":[5],"to":[6,14,126,154,166],"the":[8,21,52,56,65,77,97,102,120,127,133,145,159,167],"speech":[9,25],"from":[11],"one":[12],"speaker":[13,43,110,156],"another":[15],"speaker.":[16],"One":[17],"major":[18],"application":[19],"is":[20,89,137],"generation":[22],"of":[23,38,54,122,170],"emotional":[24,175],"with":[26],"a":[27],"new":[28],"speaker's":[29],"voice.":[30],"The":[31,48,91,113],"uses":[34],"an":[35,171],"intermediate":[36],"representation":[37,99],"prosody,":[39],"which":[40,74],"encompasses":[41],"both":[42,107],"and":[44,61,85,109,173],"emotion":[45,60,108],"related":[46],"information.":[47],"present":[49],"study":[50],"tackles":[51],"problem":[53],"estimating":[55],"mutual":[57,69,78,114],"information":[58,70,79,115,143,157],"between":[59,80],"speaker-related":[62],"factors":[63],"in":[64,140,144,158],"representation.":[67,161],"A":[68],"neural":[71],"estimator":[72],"(MINE)":[73],"could":[75],"measure":[76],"high-dimensional":[81],"continuous":[82],"embedding":[84],"discrete":[86],"speaker/emotion":[87],"label":[88],"applied.":[90],"experimental":[92],"results":[93,163],"show":[94],"that:":[95],"1)":[96],"generated":[100],"by":[101,119],"indeed":[105],"contains":[106],"information;":[111],"2)":[112],"would":[116],"be":[117,152],"determined":[118],"type":[121],"input":[123],"acoustic":[124],"features":[125],"reference":[128],"encoder;":[129],"3)":[130],"normalization":[131],"for":[132],"log":[134],"F0":[135],"feature":[136],"very":[138],"effective":[139],"increasing":[141],"emotion-related":[142],"representation;":[147],"4)":[148],"adversarial":[149],"learning":[150],"can":[151],"applied":[153],"reduce":[155],"These":[162],"are":[164],"useful":[165],"further":[168],"development":[169],"optimal":[172],"practical":[174],"systems.":[178]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
