{"id":"https://openalex.org/W1966083358","doi":"https://doi.org/10.1109/jstsp.2013.2295058","title":"Building HMM-TTS Voices on Diverse Data","display_name":"Building HMM-TTS Voices on Diverse Data","publication_year":2014,"publication_date":"2014-01-31","ids":{"openalex":"https://openalex.org/W1966083358","doi":"https://doi.org/10.1109/jstsp.2013.2295058","mag":"1966083358"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2013.2295058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2013.2295058","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031222393","display_name":"Vincent Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143477","display_name":"Toshiba (United Kingdom)","ror":"https://ror.org/054hmd463","country_code":"GB","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"]},{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["GB","JP"],"is_corresponding":true,"raw_author_name":"Vincent Wan","raw_affiliation_strings":["Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210143477"]},{"raw_affiliation_string":"Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071418947","display_name":"Javier Latorre","orcid":null},"institutions":[{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]},{"id":"https://openalex.org/I4210143477","display_name":"Toshiba (United Kingdom)","ror":"https://ror.org/054hmd463","country_code":"GB","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Javier Latorre","raw_affiliation_strings":["Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210143477"]},{"raw_affiliation_string":"Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047287498","display_name":"Kayoko Yanagisawa","orcid":"https://orcid.org/0000-0002-3444-7287"},"institutions":[{"id":"https://openalex.org/I4210143477","display_name":"Toshiba (United Kingdom)","ror":"https://ror.org/054hmd463","country_code":"GB","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"]},{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Kayoko Yanagisawa","raw_affiliation_strings":["Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210143477"]},{"raw_affiliation_string":"Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075928867","display_name":"Norbert Braunschweiler","orcid":"https://orcid.org/0000-0003-1388-440X"},"institutions":[{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]},{"id":"https://openalex.org/I4210143477","display_name":"Toshiba (United Kingdom)","ror":"https://ror.org/054hmd463","country_code":"GB","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Norbert Braunschweiler","raw_affiliation_strings":["Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210143477"]},{"raw_affiliation_string":"Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070433127","display_name":"Langzhou Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143477","display_name":"Toshiba (United Kingdom)","ror":"https://ror.org/054hmd463","country_code":"GB","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"]},{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Langzhou Chen","raw_affiliation_strings":["Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210143477"]},{"raw_affiliation_string":"Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050766679","display_name":"Mark Gales","orcid":"https://orcid.org/0000-0002-5311-8219"},"institutions":[{"id":"https://openalex.org/I4210143477","display_name":"Toshiba (United Kingdom)","ror":"https://ror.org/054hmd463","country_code":"GB","type":"company","lineage":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"]},{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Mark J. F. Gales","raw_affiliation_strings":["Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Toshiba Research Europe Ltd, Speech Technology Group, Cambridge Research Laboratory, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210143477"]},{"raw_affiliation_string":"Speech Technol. Group, Toshiba Res. Eur. Ltd., Cambridge, UK","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112250968","display_name":"Masami Akamine","orcid":null},"institutions":[{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masami Akamine","raw_affiliation_strings":["Toshiba Corporation, Corporate Research and Development Center, Saiwai-ku, Kawasaki, Japan","Corp. R&DCenter, Toshiba Corp., Kawasaki, Japan"],"affiliations":[{"raw_affiliation_string":"Toshiba Corporation, Corporate Research and Development Center, Saiwai-ku, Kawasaki, Japan","institution_ids":["https://openalex.org/I1292669757"]},{"raw_affiliation_string":"Corp. R&DCenter, Toshiba Corp., Kawasaki, Japan","institution_ids":["https://openalex.org/I1292669757"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5031222393"],"corresponding_institution_ids":["https://openalex.org/I1292669757","https://openalex.org/I4210143477"],"apc_list":null,"apc_paid":null,"fwci":1.6914,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.86979913,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"8","issue":"2","first_page":"296","last_page":"306"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8290894031524658},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7981303930282593},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.642299473285675},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6387324929237366},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.6303478479385376},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6290445327758789},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3772296607494354}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8290894031524658},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7981303930282593},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.642299473285675},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6387324929237366},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.6303478479385376},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6290445327758789},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3772296607494354},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/jstsp.2013.2295058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2013.2295058","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:generic.eprints.org:632501","is_oa":false,"landing_page_url":"http://publications.eng.cam.ac.uk/632501/","pdf_url":null,"source":{"id":"https://openalex.org/S4406922847","display_name":"Cambridge University Engineering Department Publications Database","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7599999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W35069904","https://openalex.org/W80543058","https://openalex.org/W102093577","https://openalex.org/W126988493","https://openalex.org/W330074099","https://openalex.org/W1512429158","https://openalex.org/W1517939602","https://openalex.org/W1523817494","https://openalex.org/W2005768155","https://openalex.org/W2025638820","https://openalex.org/W2039800941","https://openalex.org/W2069631319","https://openalex.org/W2091430793","https://openalex.org/W2092144778","https://openalex.org/W2095437083","https://openalex.org/W2131650634","https://openalex.org/W2142980177","https://openalex.org/W2143361917","https://openalex.org/W2153914468","https://openalex.org/W2158086337","https://openalex.org/W2165143604","https://openalex.org/W2166823384","https://openalex.org/W2269105564","https://openalex.org/W2283817422","https://openalex.org/W2292984643","https://openalex.org/W2296111744","https://openalex.org/W2740537351","https://openalex.org/W6603264027","https://openalex.org/W6611303605","https://openalex.org/W6630838124","https://openalex.org/W6673436812","https://openalex.org/W6680838236","https://openalex.org/W6681160539"],"related_works":["https://openalex.org/W2793122029","https://openalex.org/W3128571556","https://openalex.org/W2149112655","https://openalex.org/W2374918184","https://openalex.org/W2558302074","https://openalex.org/W2152415671","https://openalex.org/W1664345252","https://openalex.org/W2125057358","https://openalex.org/W2121267120","https://openalex.org/W2912239156"],"abstract_inverted_index":{"The":[0],"statistical":[1],"models":[2,45,86],"of":[3,42,51,133,150,153],"hidden":[4],"Markov":[5],"model":[6,165],"based":[7],"text-to-speech":[8],"(HMM-TTS)":[9],"systems":[10],"are":[11],"typically":[12],"built":[13],"using":[14,66,168],"homogeneous":[15],"data.":[16,155],"It":[17],"is":[18,106,158,162],"possible":[19],"to":[20,31,62,108,128,135,164],"acquire":[21],"data":[22,70,77,89,100,127,167],"from":[23,79],"many":[24],"different":[25],"sources":[26],"but":[27],"combining":[28],"them":[29],"leads":[30],"a":[32,48,110,113],"non-homogeneous":[33],"or":[34],"diverse":[35,67,88],"dataset.":[36],"This":[37],"paper":[38],"describes":[39],"the":[40,80,130,151,166],"application":[41,50],"average":[43],"voice":[44,111],"(AVMs)":[46],"and":[47,84],"novel":[49],"cluster":[52],"adaptive":[53],"training":[54,95],"(CAT)":[55],"with":[56,75,116],"multiple":[57,169],"context":[58,170],"dependent":[59],"decision":[60,172],"trees":[61],"create":[63,109],"HMM-TTS":[64],"voices":[65,146],"data:":[68],"speech":[69,76,93],"recorded":[71],"in":[72],"studios":[73],"mixed":[74],"obtained":[78],"internet.":[81],"Training":[82],"AVM":[83,123],"CAT":[85,105,142],"on":[87,96],"yields":[90],"better":[91],"quality":[92,98,145],"than":[94,147],"high":[97],"studio":[99],"alone.":[101],"Tests":[102,138],"show":[103,140],"that":[104,141,160],"able":[107],"for":[112],"target":[114,136],"speaker":[115],"as":[117,119],"little":[118],"7":[120],"seconds;":[121],"an":[122],"would":[124],"need":[125],"more":[126],"reach":[129],"same":[131],"level":[132],"similarity":[134],"speaker.":[137],"also":[139],"produces":[143],"higher":[144],"AVMs":[148],"irrespective":[149],"amount":[152],"adaptation":[154],"Lastly,":[156],"it":[157,161],"shown":[159],"beneficial":[163],"clustering":[171],"trees.":[173]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
