{"id":"https://openalex.org/W4235154690","doi":"https://doi.org/10.21437/interspeech.2015-270","title":"A study of speaker adaptation for DNN-based speech synthesis","display_name":"A study of speaker adaptation for DNN-based speech synthesis","publication_year":2015,"publication_date":"2015-09-06","ids":{"openalex":"https://openalex.org/W4235154690","doi":"https://doi.org/10.21437/interspeech.2015-270"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2015-270","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2015-270","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2015","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.pure.ed.ac.uk/ws/files/19840710/Wu_adaptation_Interspeech2015.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102765381","display_name":"Zhizheng Wu","orcid":"https://orcid.org/0009-0001-1192-9857"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Zhizheng Wu","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084169592","display_name":"Pawe\u0142 \u015awi\u0119toja\u0144ski","orcid":"https://orcid.org/0000-0001-5896-4505"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pawel Swietojanski","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038279043","display_name":"Christophe Veaux","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christophe Veaux","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027442277","display_name":"Steve Renals","orcid":"https://orcid.org/0000-0002-8790-3389"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Steve Renals","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062516688","display_name":"Simon King","orcid":"https://orcid.org/0000-0002-2694-2843"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Simon King","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102765381"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":10.6758,"has_fulltext":true,"cited_by_count":60,"citation_normalized_percentile":{"value":0.98278825,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"879","last_page":"883"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7502230405807495},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7225006222724915},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.7012511491775513},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.47873374819755554},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4624500572681427},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11609634757041931}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7502230405807495},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7225006222724915},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.7012511491775513},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.47873374819755554},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4624500572681427},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11609634757041931},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2015-270","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2015-270","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2015","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:publications/891b7eeb-3c7f-4d26-b8d6-bc6b6e0800de","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/891b7eeb-3c7f-4d26-b8d6-bc6b6e0800de","pdf_url":"https://www.pure.ed.ac.uk/ws/files/19840710/Wu_adaptation_Interspeech2015.pdf","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:publications/891b7eeb-3c7f-4d26-b8d6-bc6b6e0800de","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/891b7eeb-3c7f-4d26-b8d6-bc6b6e0800de","pdf_url":"https://www.pure.ed.ac.uk/ws/files/19840710/Wu_adaptation_Interspeech2015.pdf","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4099999964237213,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G3790594077","display_name":"Natural Speech Technology","funder_award_id":"EP/I031022/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4235154690.pdf","grobid_xml":"https://content.openalex.org/works/W4235154690.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W137106866","https://openalex.org/W1499332833","https://openalex.org/W1570874634","https://openalex.org/W1613141907","https://openalex.org/W1961429348","https://openalex.org/W1989549063","https://openalex.org/W1990505856","https://openalex.org/W2015633636","https://openalex.org/W2020024436","https://openalex.org/W2045158511","https://openalex.org/W2049686551","https://openalex.org/W2079623482","https://openalex.org/W2094147890","https://openalex.org/W2094721231","https://openalex.org/W2100969003","https://openalex.org/W2102003408","https://openalex.org/W2120605154","https://openalex.org/W2129142580","https://openalex.org/W2134973740","https://openalex.org/W2146871184","https://openalex.org/W2150769028","https://openalex.org/W2153914468","https://openalex.org/W2160815625","https://openalex.org/W2294797155","https://openalex.org/W2395750323","https://openalex.org/W2399762392","https://openalex.org/W2403307129","https://openalex.org/W2403731734"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W321304764","https://openalex.org/W2249138175","https://openalex.org/W2611678594","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2029134149"],"abstract_inverted_index":{"A":[0],"major":[1],"advantage":[2],"of":[3,43,59,113,120,127,152],"statistical":[4],"parametric":[5],"speech":[6,11,64],"synthesis":[7,12,65],"(SPSS)":[8],"over":[9],"unit-selection":[10],"is":[13],"its":[14],"adaptability":[15,42,126],"and":[16,22,94,118,130,154],"controllability":[17],"in":[18,45,150],"changing":[19],"speaker":[20,60,83,155],"characteristics":[21],"speaking":[23],"style.Recently,":[24],"several":[25],"studies":[26],"using":[27],"deep":[28],"neural":[29],"networks":[30],"(DNNs)":[31],"as":[32,79],"acoustic":[33,107],"models":[34],"for":[35,62],"SPSS":[36,46],"have":[37],"shown":[38],"promising":[39],"results.However,":[40],"the":[41,90,101,111,125,128,135,144],"DNNs":[44],"has":[47],"not":[48],"been":[49],"systematically":[50,109],"studied.In":[51],"this":[52],"paper,":[53],"we":[54,70],"conduct":[55],"an":[56],"experimental":[57],"analysis":[58],"adaptation":[61,87,116,141],"DNN-based":[63],"at":[66,100],"different":[67],"levels.In":[68],"particular,":[69],"augment":[71],"a":[72,96],"low-dimensional":[73],"speaker-specific":[74],"vector":[75],"with":[76],"linguistic":[77],"features":[78],"input":[80],"to":[81,88,104],"represent":[82],"identity,":[84],"perform":[85,95],"model":[86,147],"scale":[89],"hidden":[91,145],"activation":[92],"weights,":[93],"feature":[97],"space":[98],"transformation":[99],"output":[102],"layer":[103],"modify":[105],"generated":[106],"features.We":[108],"analyse":[110],"performance":[112,142],"each":[114],"individual":[115],"technique":[117],"that":[119,134],"their":[121],"combinations.Experimental":[122],"results":[123],"confirm":[124],"DNN,":[129],"listening":[131],"tests":[132],"demonstrate":[133],"DNN":[136],"can":[137],"achieve":[138],"significantly":[139],"better":[140],"than":[143],"Markov":[146],"(HMM)":[148],"baseline":[149],"terms":[151],"naturalness":[153],"similarity.":[156]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
