{"id":"https://openalex.org/W3094785744","doi":"https://doi.org/10.21437/interspeech.2020-2985","title":"Multi-Reference Neural TTS Stylization with Adversarial Cycle Consistency","display_name":"Multi-Reference Neural TTS Stylization with Adversarial Cycle Consistency","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3094785744","doi":"https://doi.org/10.21437/interspeech.2020-2985","mag":"3094785744"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-2985","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-2985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059609203","display_name":"Matt Whitehill","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Matt Whitehill","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102021059","display_name":"Shuang Ma","orcid":"https://orcid.org/0000-0002-9870-5492"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuang Ma","raw_affiliation_strings":["University of Buffalo"],"affiliations":[{"raw_affiliation_string":"University of Buffalo","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100681741","display_name":"Daniel McDuff","orcid":"https://orcid.org/0000-0001-7313-0082"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Daniel McDuff","raw_affiliation_strings":["University of Washington","Microsoft Research"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]},{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100566791","display_name":"Yale Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yale Song","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5059609203"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.7884,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.94312047,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4442","last_page":"4446"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7786029577255249},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7035466432571411},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6876004934310913},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5682656168937683},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.540895938873291},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49987149238586426}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7786029577255249},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7035466432571411},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6876004934310913},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5682656168937683},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.540895938873291},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49987149238586426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-2985","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-2985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1882958252","https://openalex.org/W2120708938","https://openalex.org/W2120847449","https://openalex.org/W2187089797","https://openalex.org/W2519091744","https://openalex.org/W2794490148","https://openalex.org/W2808706139","https://openalex.org/W2906797124","https://openalex.org/W2932022923","https://openalex.org/W2962793481","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W3010916717","https://openalex.org/W4295731579"],"related_works":["https://openalex.org/W2502115930","https://openalex.org/W4246396837","https://openalex.org/W3176240006","https://openalex.org/W3126451824","https://openalex.org/W2482350142","https://openalex.org/W1561927205","https://openalex.org/W3191453585","https://openalex.org/W4297672492","https://openalex.org/W4288019534","https://openalex.org/W3192589309"],"abstract_inverted_index":{"Current":[0],"multi-reference":[1],"style":[2,20,26,33,67,120,145],"transfer":[3,34,98,121],"models":[4,28],"for":[5,22,35],"Text-to-Speech":[6],"(TTS)":[7],"perform":[8],"sub-optimally":[9],"on":[10,123],"disjoints":[11],"datasets,":[12],"where":[13],"one":[14,23],"dataset":[15,102,108],"contains":[16],"only":[17,110],"a":[18,101,107,111,116],"single":[19,112],"class":[21],"of":[24,63],"the":[25,36,42,61,82,87,143,147],"dimensions.These":[27],"generally":[29],"fail":[30],"to":[31,59,85,97,106,142],"produce":[32],"dimension":[37],"that":[38],"is":[39],"underrepresented":[40],"in":[41,115,119,129],"dataset.In":[43],"this":[44,95],"paper,":[45],"we":[46,70],"propose":[47],"an":[48],"adversarial":[49,91],"cycle":[50,92],"consistency":[51],"training":[52],"scheme":[53],"with":[54,74,109,126],"paired":[55],"and":[56,80,131],"unpaired":[57,72],"triplets":[58,73],"ensure":[60],"use":[62,94],"information":[64],"from":[65,100],"all":[66],"dimensions.During":[68],"training,":[69],"incorporate":[71],"randomly":[75],"selected":[76],"reference":[77,144],"audio":[78],"samples":[79,150],"encourage":[81],"synthesized":[83],"speech":[84,149],"preserve":[86],"appropriate":[88],"styles":[89],"using":[90],"consistency.We":[93],"method":[96,136],"emotion":[99,124],"containing":[103],"four":[104],"emotions":[105],"emotion.This":[113],"results":[114],"78%":[117],"improvement":[118],"(based":[122],"classification)":[125],"minimal":[127],"reduction":[128],"fidelity":[130],"naturalness.In":[132],"subjective":[133],"evaluations":[134],"our":[135],"was":[137],"consistently":[138],"rated":[139],"as":[140],"closer":[141],"than":[146],"baseline.Synthesized":[148],"are":[151],"available":[152],"at:":[153],"https://sites.google.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
