{"id":"https://openalex.org/W2996797022","doi":"https://doi.org/10.1109/lsp.2019.2961213","title":"Voice Conversion for Whispered Speech Synthesis","display_name":"Voice Conversion for Whispered Speech Synthesis","publication_year":2019,"publication_date":"2019-12-24","ids":{"openalex":"https://openalex.org/W2996797022","doi":"https://doi.org/10.1109/lsp.2019.2961213","mag":"2996797022"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2019.2961213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2019.2961213","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1912.05289","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Marius Cotescu","orcid":"https://orcid.org/0000-0001-7005-2920"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marius Cotescu","raw_affiliation_strings":["TTS Research, Amazon.com Inc, Cambridge, U.K"],"raw_orcid":"https://orcid.org/0000-0001-7005-2920","affiliations":[{"raw_affiliation_string":"TTS Research, Amazon.com Inc, Cambridge, U.K","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Thomas Drugman","orcid":"https://orcid.org/0000-0002-1491-7878"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas Drugman","raw_affiliation_strings":["TTS Research, Amazon.com Inc, Cambridge, U.K"],"raw_orcid":"https://orcid.org/0000-0002-1491-7878","affiliations":[{"raw_affiliation_string":"TTS Research, Amazon.com Inc, Cambridge, U.K","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Goeric Huybrechts","orcid":"https://orcid.org/0000-0003-0222-3008"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goeric Huybrechts","raw_affiliation_strings":["TTS Research, Amazon.com Inc, Cambridge, U.K"],"raw_orcid":"https://orcid.org/0000-0003-0222-3008","affiliations":[{"raw_affiliation_string":"TTS Research, Amazon.com Inc, Cambridge, U.K","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jaime Lorenzo-Trueba","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaime Lorenzo-Trueba","raw_affiliation_strings":["TTS Research, Amazon.com Inc, Cambridge, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TTS Research, Amazon.com Inc, Cambridge, U.K","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":null,"display_name":"Alexis Moinet","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexis Moinet","raw_affiliation_strings":["TTS Research, Amazon.com Inc, Cambridge, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TTS Research, Amazon.com Inc, Cambridge, U.K","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":1.7339,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.89035931,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"27","issue":null,"first_page":"186","last_page":"190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6165000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6165000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.22210000455379486,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.04050000011920929,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.906000018119812},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.548799991607666},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5322999954223633},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.5291000008583069},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.5120000243186951},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5076000094413757},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4964999854564667},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4510999917984009}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.906000018119812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7972999811172485},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7376999855041504},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.548799991607666},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5322999954223633},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.5291000008583069},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.5120000243186951},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5076000094413757},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4964999854564667},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4510999917984009},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.44449999928474426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44119998812675476},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4115999937057495},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.39559999108314514},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3824000060558319},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.382099986076355},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.37299999594688416},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.33000001311302185},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.3269999921321869},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.32269999384880066},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.29409998655319214},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.2531999945640564}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lsp.2019.2961213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2019.2961213","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1912.05289","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.05289","pdf_url":"https://arxiv.org/pdf/1912.05289","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1912.05289","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.05289","pdf_url":"https://arxiv.org/pdf/1912.05289","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1509691205","https://openalex.org/W1988133680","https://openalex.org/W2000513720","https://openalex.org/W2002222875","https://openalex.org/W2006033171","https://openalex.org/W2011440797","https://openalex.org/W2021512166","https://openalex.org/W2057609679","https://openalex.org/W2087335130","https://openalex.org/W2096980176","https://openalex.org/W2099076569","https://openalex.org/W2100974241","https://openalex.org/W2105160541","https://openalex.org/W2120605154","https://openalex.org/W2156142001","https://openalex.org/W2159089611","https://openalex.org/W2312732917","https://openalex.org/W2327037637","https://openalex.org/W2471520273","https://openalex.org/W2739735615","https://openalex.org/W2806000759","https://openalex.org/W2806188042","https://openalex.org/W2811001736","https://openalex.org/W2889417860","https://openalex.org/W2889933531","https://openalex.org/W2899877258","https://openalex.org/W2903365642","https://openalex.org/W2962896155","https://openalex.org/W2963035245","https://openalex.org/W2963539064","https://openalex.org/W2964069186","https://openalex.org/W6601701123","https://openalex.org/W6631190155","https://openalex.org/W6652693351","https://openalex.org/W6674330103","https://openalex.org/W6681151457","https://openalex.org/W6719858163","https://openalex.org/W6754095682","https://openalex.org/W6777926273","https://openalex.org/W7008777630"],"related_works":[],"abstract_inverted_index":{"We":[0,27,54,75,103,123],"present":[1],"an":[2,65],"approach":[3],"to":[4,19,24,39,111],"synthesize":[5],"whisper":[6,63,101],"by":[7],"applying":[8,78],"a":[9],"handcrafted":[10],"signal":[11,87],"processing":[12,88],"recipe":[13],"and":[14,34,49,57,68,90,143],"Voice":[15],"Conversion":[16],"(VC)":[17],"techniques":[18,80],"convert":[20],"normally":[21],"phonated":[22],"speech":[23,48],"whispered":[25,52],"speech.":[26,53],"investigate":[28,104],"using":[29,85],"Gaussian":[30],"Mixture":[31],"Models":[32],"(GMM)":[33],"Deep":[35],"Neural":[36],"Networks":[37],"(DNN)":[38],"model":[40,110],"the":[41,61,70,105,108,127,131,140,147,157],"mapping":[42],"between":[43],"acoustic":[44],"features":[45],"of":[46,51,60,99,107,146,162],"normal":[47],"those":[50],"evaluate":[55],"naturalness":[56,142],"speaker":[58,129,144],"similarity":[59,145],"converted":[62,148],"on":[64,69,113,139],"internal":[66],"corpus":[67],"publicly":[71],"available":[72],"wTIMIT":[73],"corpus.":[74],"show":[76,124],"that":[77,94,125],"VC":[79],"is":[81,154],"significantly":[82],"better":[83],"than":[84],"rule-based":[86],"methods":[89],"it":[91],"achieves":[92],"results":[93],"are":[95],"indistinguishable":[96],"from":[97,120,130],"copy-synthesis":[98],"natural":[100],"recordings.":[102],"ability":[106],"DNN":[109,152],"generalize":[112],"unseen":[114],"speakers,":[115],"when":[116],"trained":[117],"with":[118],"data":[119],"multiple":[121],"speakers.":[122],"excluding":[126],"target":[128],"training":[132],"set":[133],"has":[134],"little":[135],"or":[136],"no":[137],"impact":[138],"perceived":[141],"whisper.":[149],"The":[150],"proposed":[151],"method":[153],"used":[155],"in":[156],"newly":[158],"released":[159],"Whisper":[160],"Mode":[161],"Amazon":[163],"Alexa.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2020-01-10T00:00:00"}
