{"id":"https://openalex.org/W2168411176","doi":"https://doi.org/10.21437/interspeech.2010-174","title":"Roles of the average voice in speaker-adaptive HMM-based speech synthesis","display_name":"Roles of the average voice in speaker-adaptive HMM-based speech synthesis","publication_year":2010,"publication_date":"2010-09-26","ids":{"openalex":"https://openalex.org/W2168411176","doi":"https://doi.org/10.21437/interspeech.2010-174","mag":"2168411176"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2010-174","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2010-174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2010","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/6949c546-449d-4db3-9901-469b624a15dd","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, Edinburgh, EH8 9AB, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, Edinburgh, EH8 9AB, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110238677","display_name":"Oliver Watts","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Oliver Watts","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, Edinburgh, EH8 9AB, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, Edinburgh, EH8 9AB, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062516688","display_name":"Simon King","orcid":"https://orcid.org/0000-0002-2694-2843"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Simon King","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, Edinburgh, EH8 9AB, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, Edinburgh, EH8 9AB, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058936597","display_name":"Bela Usabaev","orcid":null},"institutions":[{"id":"https://openalex.org/I8087733","display_name":"University of T\u00fcbingen","ror":"https://ror.org/03a1kwz48","country_code":"DE","type":"education","lineage":["https://openalex.org/I8087733"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bela Usabaev","raw_affiliation_strings":["Universit\u00e4t T\u00fcbingen, Wilhelmstr. 7 72074 T\u00fcbingen, Germany"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t T\u00fcbingen, Wilhelmstr. 7 72074 T\u00fcbingen, Germany","institution_ids":["https://openalex.org/I8087733"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007639385"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":5.574,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.95820188,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"418","last_page":"421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8546964526176453},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7940880060195923},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.766789972782135},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6623947620391846},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6232317686080933},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5970835089683533},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.48016849160194397},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4562796354293823},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2799469232559204},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.18036988377571106},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.16891193389892578}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8546964526176453},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7940880060195923},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.766789972782135},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6623947620391846},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6232317686080933},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5970835089683533},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.48016849160194397},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4562796354293823},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2799469232559204},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.18036988377571106},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.16891193389892578},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.21437/interspeech.2010-174","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2010-174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2010","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.1019.9313","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1019.9313","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.era.lib.ed.ac.uk/bitstream/handle/1842/4865/p50227.pdf%3Bjsessionid%3D683867AA612476BA2A29ADEA08CB655E?sequence%3D1","raw_type":"text"},{"id":"pmh:oai:era.ed.ac.uk:1842/4560","is_oa":false,"landing_page_url":"http://hdl.handle.net/1842/4560","pdf_url":null,"source":{"id":"https://openalex.org/S7407055182","display_name":"ERA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"},{"id":"pmh:oai:era.ed.ac.uk:1842/4865","is_oa":false,"landing_page_url":"http://hdl.handle.net/1842/4865","pdf_url":null,"source":{"id":"https://openalex.org/S7407055182","display_name":"ERA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"},{"id":"pmh:oai:pure.ed.ac.uk:ec_fundedresources/6949c546-449d-4db3-9901-469b624a15dd","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.ed.ac.uk:openaire/6949c546-449d-4db3-9901-469b624a15dd","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/6949c546-449d-4db3-9901-469b624a15dd","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Yamagishi, J, Watts, O, King, S & Usabaev, B 2010, Roles of the Average Voice in Speaker-adaptive HMM-based Speech Synthesis. in Proc. Interspeech 2010. < http://hdl.handle.net/1842/4560 >","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/6949c546-449d-4db3-9901-469b624a15dd","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/6949c546-449d-4db3-9901-469b624a15dd","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Yamagishi, J, Watts, O, King, S & Usabaev, B 2010, Roles of the Average Voice in Speaker-adaptive HMM-based Speech Synthesis. in Proc. Interspeech 2010. < http://hdl.handle.net/1842/4560 >","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W163148820","https://openalex.org/W1514737389","https://openalex.org/W1556556182","https://openalex.org/W1580849279","https://openalex.org/W1970256373","https://openalex.org/W1975800736","https://openalex.org/W1984905644","https://openalex.org/W2000513720","https://openalex.org/W2000728161","https://openalex.org/W2002342963","https://openalex.org/W2049686551","https://openalex.org/W2117418893","https://openalex.org/W2129142580","https://openalex.org/W2153914468","https://openalex.org/W2165143604","https://openalex.org/W2168884969","https://openalex.org/W2404126548","https://openalex.org/W3011402798","https://openalex.org/W3022413497"],"related_works":["https://openalex.org/W2606551632","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W3119288895","https://openalex.org/W2108985546","https://openalex.org/W2038801705","https://openalex.org/W2433276473","https://openalex.org/W2077992636","https://openalex.org/W1537411440","https://openalex.org/W2185075503"],"abstract_inverted_index":{"In":[0],"speaker-adaptive":[1],"HMM-based":[2],"speech":[3,12,94],"synthesis,":[4,95],"there":[5],"are":[6],"a":[7],"few":[8],"speakers":[9],"whose":[10],"synthetic":[11],"sounds":[13],"worse":[14],"than":[15],"that":[16,42,83],"of":[17,25],"other":[18],"speakers,":[19],"despite":[20],"having":[21],"the":[22,30,47,52,59,71],"same":[23,31],"amount":[24],"adapta-tion":[26],"data":[27],"from":[28,46],"within":[29],"corpus.":[32],"This":[33],"paper":[34],"investigates":[35],"these":[36],"fluctuations":[37],"in":[38],"quality":[39],"and":[40,73],"found":[41],"as":[43],"mel-cepstral":[44],"dis-tance":[45],"average":[48,77,97],"voice":[49,78],"becomes":[50],"larger,":[51],"MOS":[53],"scores":[54],"generally":[55],"become":[56],"worse.":[57],"Although":[58],"negative":[60],"correlation":[61,85],"ob-tained":[62],"is":[63,86],"not":[64],"strong":[65],"enough,":[66],"this":[67,84],"helps":[68],"us":[69],"improve":[70],"training":[72],"adaptation":[74,100],"strategies":[75],"for":[76],"models.":[79],"Further-more":[80],"we":[81],"remark":[82],"strongly":[87],"linked":[88],"to":[89],"\u201cvocal":[90],"attractiveness.\u201d":[91],"Index":[92],"Terms:":[93],"HMM,":[96],"voice,":[98],"speaker":[99]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
