{"id":"https://openalex.org/W2913271971","doi":"https://doi.org/10.1109/slt.2018.8639550","title":"Neural TTS Voice Conversion","display_name":"Neural TTS Voice Conversion","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2913271971","doi":"https://doi.org/10.1109/slt.2018.8639550","mag":"2913271971"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2018.8639550","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2018.8639550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054462082","display_name":"Zvi Kons","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zvi Kons","raw_affiliation_strings":["IBM Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035109881","display_name":"Slava Shechtman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Slava Shechtman","raw_affiliation_strings":["IBM Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050096191","display_name":"Alex Sorin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alex Sorin","raw_affiliation_strings":["IBM Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053864983","display_name":"Ron Hoory","orcid":"https://orcid.org/0009-0006-1327-5160"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ron Hoory","raw_affiliation_strings":["IBM Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021404609","display_name":"Carmel Rabinovitz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Carmel Rabinovitz","raw_affiliation_strings":["IBM Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079262240","display_name":"Edmilson Morais","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Edmilson Da Silva Morais","raw_affiliation_strings":["IBM Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3516,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.86406488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"290","last_page":"296"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8142780065536499},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7256429195404053},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.7074640989303589},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.6821379661560059},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6428610682487488},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5707909464836121},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5618537068367004},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5313304662704468},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5171725153923035},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4929811954498291},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4606369435787201},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36702677607536316},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3576371967792511},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09272465109825134},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.089793860912323}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8142780065536499},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7256429195404053},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.7074640989303589},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.6821379661560059},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6428610682487488},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5707909464836121},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5618537068367004},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5313304662704468},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5171725153923035},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4929811954498291},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4606369435787201},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36702677607536316},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3576371967792511},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09272465109825134},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.089793860912323},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt.2018.8639550","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2018.8639550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Gender equality","score":0.5,"id":"https://metadata.un.org/sdg/5"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1959608418","https://openalex.org/W2396931946","https://openalex.org/W2471520273","https://openalex.org/W2515028311","https://openalex.org/W2619368999","https://openalex.org/W2788357188","https://openalex.org/W2808706139","https://openalex.org/W2962739369","https://openalex.org/W2963192573","https://openalex.org/W2963432880","https://openalex.org/W2963534259","https://openalex.org/W2963691546","https://openalex.org/W2963712897","https://openalex.org/W2964243274","https://openalex.org/W4298174729","https://openalex.org/W4298857617","https://openalex.org/W6640963894","https://openalex.org/W6712324455","https://openalex.org/W6738277540","https://openalex.org/W6745569068","https://openalex.org/W6748573829","https://openalex.org/W6748588790","https://openalex.org/W6749489859","https://openalex.org/W6752888775","https://openalex.org/W6936113694"],"related_works":["https://openalex.org/W1927421023","https://openalex.org/W10581632","https://openalex.org/W3149582125","https://openalex.org/W157238252","https://openalex.org/W2169632867","https://openalex.org/W2465421051","https://openalex.org/W152045069","https://openalex.org/W1992908141","https://openalex.org/W652196294","https://openalex.org/W2368700418"],"abstract_inverted_index":{"Recently,":[0],"speaker":[1,42,77,140],"adaptation":[2,25],"of":[3,21,26,40,52,75,154],"neural":[4,119],"TTS":[5,53,80,101,120],"models":[6,147],"received":[7],"significant":[8],"interest,":[9],"and":[10,66,126,157,161,167],"several":[11],"studies":[12],"focusing":[13],"on":[14,32,48,62,134],"this":[15,44],"topic":[16],"have":[17,107],"been":[18],"published.":[19],"All":[20],"them":[22],"explore":[23],"an":[24,27,57,162],"initial":[28,58],"multi-speaker":[29,91],"model":[30],"trained":[31,61,133],"a":[33,49,63,73,85,118,135,152],"corpus":[34],"containing":[35],"from":[36],"tens":[37],"to":[38,69,72,106,151],"hundreds":[39],"individual":[41],"voices.In":[43],"work":[45],"we":[46],"focus":[47],"challenging":[50],"task":[51],"voice":[54,81,166],"conversion":[55,82],"where":[56],"system":[59,121,131,146],"is":[60,104,132],"single-speaker":[64,112],"data":[65],"then":[67],"need":[68],"be":[70,94,149],"adapted":[71],"variety":[74,153],"external":[76,155],"voices.":[78],"The":[79,130],"setup":[83],"represents":[84],"very":[86],"important":[87],"use":[88],"case.":[89],"Transcribed":[90],"datasets":[92],"might":[93],"unavailable":[95],"for":[96],"many":[97],"languages":[98],"while":[99],"any":[100],"technology":[102],"provider":[103],"expected":[105],"at":[108],"least":[109],"one":[110],"suitable":[111],"dataset":[113],"per":[114],"supported":[115],"language.We":[116],"present":[117,168],"comprising":[122],"separate":[123],"prosody":[124],"generator":[125],"synthesizer":[127],"DNN":[128],"models.":[129],"high":[136],"quality":[137],"proprietary":[138],"male":[139,156],"dataset.":[141],"We":[142],"show":[143],"that":[144],"the":[145],"can":[148],"converted":[150],"female":[158],"ordinary":[159],"voices":[160],"extremely":[163],"expressive":[164],"artist's":[165],"crowd-base":[169],"subjective":[170],"evaluation":[171],"results.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
