{"id":"https://openalex.org/W4385822623","doi":"https://doi.org/10.21437/interspeech.2023-1032","title":"A Neural TTS System with Parallel Prosody Transfer from Unseen Speakers","display_name":"A Neural TTS System with Parallel Prosody Transfer from Unseen Speakers","publication_year":2023,"publication_date":"2023-08-14","ids":{"openalex":"https://openalex.org/W4385822623","doi":"https://doi.org/10.21437/interspeech.2023-1032"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2023-1032","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-1032","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.11487","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035109881","display_name":"Slava Shechtman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Slava Shechtman","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003690515","display_name":"Raul Castro Fernandez","orcid":"https://orcid.org/0000-0001-7675-6080"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Raul Fernandez","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5003690515"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3554,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65764269,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4853","last_page":"4857"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.8032625913619995},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7134448289871216},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.5657050609588623},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5397639274597168},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4637722373008728},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4544915556907654},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.0633726418018341}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.8032625913619995},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7134448289871216},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.5657050609588623},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5397639274597168},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4637722373008728},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4544915556907654},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0633726418018341}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2023-1032","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-1032","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2309.11487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.11487","pdf_url":"https://arxiv.org/pdf/2309.11487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.11487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.11487","pdf_url":"https://arxiv.org/pdf/2309.11487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385822623.pdf","grobid_xml":"https://content.openalex.org/works/W4385822623.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W2515028311","https://openalex.org/W2604184139","https://openalex.org/W2733903155","https://openalex.org/W2747874407","https://openalex.org/W2963091184","https://openalex.org/W2964138190","https://openalex.org/W2964243274","https://openalex.org/W2972359262","https://openalex.org/W3022876224","https://openalex.org/W3091928890","https://openalex.org/W3101882441","https://openalex.org/W3144667183","https://openalex.org/W3198712562","https://openalex.org/W3202098869","https://openalex.org/W4295731579","https://openalex.org/W4297841509"],"related_works":["https://openalex.org/W2355553914","https://openalex.org/W149862513","https://openalex.org/W2347684782","https://openalex.org/W187117048","https://openalex.org/W4320472397","https://openalex.org/W2401269021","https://openalex.org/W2145654520","https://openalex.org/W2750037515","https://openalex.org/W4319862652","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Modern":[0],"neural":[1,82],"TTS":[2,42,55,83,120,130],"systems":[3,21],"are":[4],"capable":[5],"of":[6,17,33,138],"generating":[7],"natural":[8],"and":[9],"expressive":[10],"speech":[11,35,60,112],"when":[12],"provided":[13],"with":[14,25,57,86,122],"sufficient":[15],"amounts":[16],"training":[18],"data.":[19],"Such":[20],"can":[22,108],"be":[23,46],"equipped":[24,85],"prosody-control":[26],"functionality,":[27],"allowing":[28],"for":[29,71],"more":[30],"direct":[31],"shaping":[32],"the":[34,54,91,105,111,128],"output":[36],"at":[37],"inference":[38],"time.":[39],"In":[40,75],"some":[41],"applications,":[43],"it":[44],"may":[45],"desirable":[47],"to":[48,63,89,117],"have":[49],"an":[50,58,65,99],"option":[51],"that":[52,104],"guides":[53],"system":[56,84,107],"ad-hoc":[59],"recording":[61,97],"exemplar":[62],"impose":[64],"implicit":[66],"fine-grained,":[67],"user-preferred":[68],"prosodic":[69],"realization":[70],"certain":[72],"input":[73],"prompts.":[74],"this":[76],"work":[77],"we":[78],"present":[79],"a":[80,94,136],"first-of-its-kind":[81],"such":[87],"functionality":[88],"transfer":[90,110],"prosody":[92,113],"from":[93,98,114],"parallel":[95],"text":[96],"unseen":[100],"speaker.":[101],"We":[102],"demonstrate":[103],"proposed":[106],"precisely":[109],"novel":[115],"speakers":[116],"various":[118],"trained":[119],"voices":[121],"no":[123],"quality":[124],"degradation,":[125],"while":[126],"preserving":[127],"target":[129],"speakers'":[131],"identity,":[132],"as":[133],"evaluated":[134],"by":[135],"set":[137],"subjective":[139],"listening":[140],"experiments.":[141]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
