{"id":"https://openalex.org/W4225264140","doi":"https://doi.org/10.1109/icassp43922.2022.9746179","title":"Cross-Speaker Style Transfer for Text-to-Speech Using Data Augmentation","display_name":"Cross-Speaker Style Transfer for Text-to-Speech Using Data Augmentation","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225264140","doi":"https://doi.org/10.1109/icassp43922.2022.9746179"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746179","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746179","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109494794","display_name":"Manuel Sam Ribeiro","orcid":"https://orcid.org/0000-0001-8096-2231"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Manuel Sam Ribeiro","raw_affiliation_strings":["Amazon Alexa,TTS Research","TTS Research, Amazon Alexa"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,TTS Research","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"TTS Research, Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053794772","display_name":"Julian Roth","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julian Roth","raw_affiliation_strings":["Amazon Alexa,TTS Research","TTS Research, Amazon Alexa"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,TTS Research","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"TTS Research, Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041378836","display_name":"Giulia Comini","orcid":"https://orcid.org/0000-0002-9391-6565"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Giulia Comini","raw_affiliation_strings":["Amazon Alexa,TTS Research","TTS Research, Amazon Alexa"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,TTS Research","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"TTS Research, Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058343897","display_name":"Goeric Huybrechts","orcid":"https://orcid.org/0000-0003-0222-3008"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goeric Huybrechts","raw_affiliation_strings":["Amazon Alexa,TTS Research","TTS Research, Amazon Alexa"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,TTS Research","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"TTS Research, Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031116817","display_name":"Adam Gabry\u015b","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Gabrys","raw_affiliation_strings":["Amazon Alexa,TTS Research","TTS Research, Amazon Alexa"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,TTS Research","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"TTS Research, Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019530089","display_name":"Jaime Lorenzo-Trueba","orcid":"https://orcid.org/0000-0003-0459-1429"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaime Lorenzo-Trueba","raw_affiliation_strings":["Amazon Alexa,TTS Research","TTS Research, Amazon Alexa"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,TTS Research","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"TTS Research, Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5109494794"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":1.5646,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.84795972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"6797","last_page":"6801"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.746724009513855},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6289267539978027},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.627121090888977},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.49555695056915283},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.4945676028728485},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4592222571372986},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45315858721733093},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35332274436950684},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.0544360876083374}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.746724009513855},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6289267539978027},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.627121090888977},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.49555695056915283},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.4945676028728485},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4592222571372986},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45315858721733093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35332274436950684},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0544360876083374},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746179","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746179","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1959608418","https://openalex.org/W2133564696","https://openalex.org/W2187089797","https://openalex.org/W2749651610","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2904459034","https://openalex.org/W2932022923","https://openalex.org/W2945478979","https://openalex.org/W2954386831","https://openalex.org/W2962788625","https://openalex.org/W2964243274","https://openalex.org/W3007067948","https://openalex.org/W3015645837","https://openalex.org/W3015805741","https://openalex.org/W3022876224","https://openalex.org/W3083565479","https://openalex.org/W3094785744","https://openalex.org/W3096457008","https://openalex.org/W3135418837","https://openalex.org/W3139170550","https://openalex.org/W3161492781","https://openalex.org/W3161890269","https://openalex.org/W3162770051","https://openalex.org/W3174758275","https://openalex.org/W3194613004","https://openalex.org/W3195171908","https://openalex.org/W3195366750","https://openalex.org/W3198104520","https://openalex.org/W3198712562","https://openalex.org/W4295731579","https://openalex.org/W6640963894","https://openalex.org/W6679434410","https://openalex.org/W6750489868","https://openalex.org/W6760861152","https://openalex.org/W6762533536","https://openalex.org/W6796730497","https://openalex.org/W6797157791"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W4389984014","https://openalex.org/W2144208207","https://openalex.org/W1509309911","https://openalex.org/W1599425004","https://openalex.org/W2118860825","https://openalex.org/W2096510939"],"abstract_inverted_index":{"We":[0,17,98,128,146],"address":[1],"the":[2,52,69,86,178],"problem":[3],"of":[4,23,71,125,138,156,173],"cross-speaker":[5],"style":[6,174],"transfer":[7],"for":[8],"text-to-speech":[9],"(TTS)":[10],"using":[11,113,142],"data":[12,26,35,67,78,84],"augmentation":[13],"via":[14],"voice":[15,61,76],"conversion.":[16],"assume":[18],"to":[19,42,63,91,153],"have":[20],"a":[21,28,44,93,122,154],"corpus":[22],"neutral":[24],"non-expressive":[25],"from":[27,36,68,85],"target":[29,53,87,179],"speaker":[30,88],"and":[31,89,107],"supporting":[32,72,117,126,144],"conversational":[33],"expressive":[34,73],"different":[37],"speakers.":[38,74,145],"Our":[39],"goal":[40],"is":[41,48,79,104,111],"build":[43],"TTS":[45,96],"system":[46],"that":[47,101,132,164],"expressive,":[49],"while":[50,176],"retaining":[51,177],"speaker\u2019s":[54,180],"identity.":[55,181],"The":[56,75,109],"proposed":[57,151],"approach":[58,103,134],"relies":[59],"on":[60],"conversion":[62],"first":[64],"generate":[65],"high-quality":[66],"set":[70,155],"converted":[77],"then":[80],"pooled":[81],"with":[82],"natural":[83],"used":[90],"train":[92],"single-speaker":[94],"multi-style":[95],"system.":[97],"provide":[99,130],"evidence":[100,131],"this":[102,133],"efficient,":[105],"flexible,":[106],"scalable.":[108],"method":[110],"evaluated":[112],"one":[114],"or":[115],"more":[116],"speakers,":[118],"as":[119,121],"well":[120],"variable":[123],"amount":[124],"data.":[127],"further":[129],"allows":[135],"some":[136],"controllability":[137],"speaking":[139],"style,":[140],"when":[141],"multiple":[143],"conclude":[147],"by":[148],"scaling":[149],"our":[150,165],"technology":[152,166],"14":[157],"speakers":[158],"across":[159],"7":[160],"languages.":[161],"Results":[162],"indicate":[163],"consistently":[167],"improves":[168],"synthetic":[169],"samples":[170],"in":[171],"terms":[172],"similarity,":[175]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-30T09:15:22.047038","created_date":"2025-10-10T00:00:00"}
