{"id":"https://openalex.org/W2803229097","doi":"https://doi.org/10.1109/taslp.2018.2835720","title":"A Comparison Between STRAIGHT, Glottal, and Sinusoidal Vocoding in Statistical Parametric Speech Synthesis","display_name":"A Comparison Between STRAIGHT, Glottal, and Sinusoidal Vocoding in Statistical Parametric Speech Synthesis","publication_year":2018,"publication_date":"2018-05-18","ids":{"openalex":"https://openalex.org/W2803229097","doi":"https://doi.org/10.1109/taslp.2018.2835720","mag":"2803229097"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2018.2835720","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2835720","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/377226c9-1701-4f98-881d-fce0e407de0d","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013722514","display_name":"Manu Airaksinen","orcid":"https://orcid.org/0000-0002-8031-2260"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Manu Airaksinen","raw_affiliation_strings":["Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081575688","display_name":"Lauri Juvela","orcid":"https://orcid.org/0000-0002-2201-103X"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Lauri Juvela","raw_affiliation_strings":["Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064828339","display_name":"Bajibabu Bollepalli","orcid":"https://orcid.org/0000-0003-1268-0579"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Bajibabu Bollepalli","raw_affiliation_strings":["Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042186400","display_name":"Paavo Alku","orcid":"https://orcid.org/0000-0002-8173-9418"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Paavo Alku","raw_affiliation_strings":["Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Signal Processing and Acoustics, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5013722514"],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":4.5696,"has_fulltext":true,"cited_by_count":45,"citation_normalized_percentile":{"value":0.95679406,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"26","issue":"9","first_page":"1658","last_page":"1670"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.770233154296875},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.7626255750656128},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.7282823920249939},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7143404483795166},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6744289398193359},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5293111205101013},{"id":"https://openalex.org/keywords/spectral-envelope","display_name":"Spectral envelope","score":0.5253279209136963},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.46077272295951843},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15791216492652893},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0873071551322937},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07072710990905762},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.05932387709617615}],"concepts":[{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.770233154296875},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.7626255750656128},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.7282823920249939},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7143404483795166},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6744289398193359},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5293111205101013},{"id":"https://openalex.org/C54926389","wikidata":"https://www.wikidata.org/wiki/Q7575188","display_name":"Spectral envelope","level":2,"score":0.5253279209136963},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.46077272295951843},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15791216492652893},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0873071551322937},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07072710990905762},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.05932387709617615},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/taslp.2018.2835720","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2835720","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/377226c9-1701-4f98-881d-fce0e407de0d","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/377226c9-1701-4f98-881d-fce0e407de0d","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Airaksinen, M, Juvela, L, Bollepalli, B, Yamagishi, J & Alku, P 2018, 'A Comparison Between STRAIGHT, Glottal, an Sinusoidal Vocoding in Statistical Parametric Speech Synthesis', IEEE/ACM Transactions on Audio, Speech and Language Processing, vol. 26, no. 9, pp. 1658-1670. https://doi.org/10.1109/TASLP.2018.2835720","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/34464","is_oa":true,"landing_page_url":"https://research.aalto.fi/en/publications/64fac994-c10e-42b2-b380-055654b97c05","pdf_url":"https://research.aalto.fi/files/21729808/ELEC_airaksinen_et_al_Comparison_between_IEEETranOnASLP.pdf","source":{"id":"https://openalex.org/S4306401662","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},{"id":"pmh:oai:pure.ed.ac.uk:publications/377226c9-1701-4f98-881d-fce0e407de0d","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11820/377226c9-1701-4f98-881d-fce0e407de0d","pdf_url":"https://www.research.ed.ac.uk/en/publications/377226c9-1701-4f98-881d-fce0e407de0d","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/377226c9-1701-4f98-881d-fce0e407de0d","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/377226c9-1701-4f98-881d-fce0e407de0d","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Airaksinen, M, Juvela, L, Bollepalli, B, Yamagishi, J & Alku, P 2018, 'A Comparison Between STRAIGHT, Glottal, an Sinusoidal Vocoding in Statistical Parametric Speech Synthesis', IEEE/ACM Transactions on Audio, Speech and Language Processing, vol. 26, no. 9, pp. 1658-1670. https://doi.org/10.1109/TASLP.2018.2835720","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5699999928474426}],"awards":[{"id":"https://openalex.org/G2193098572","display_name":null,"funder_award_id":"312490","funder_id":"https://openalex.org/F4320321108","funder_display_name":"Academy of Finland"},{"id":"https://openalex.org/G7647910877","display_name":null,"funder_award_id":"284671","funder_id":"https://openalex.org/F4320321108","funder_display_name":"Academy of Finland"}],"funders":[{"id":"https://openalex.org/F4320321108","display_name":"Academy of Finland","ror":"https://ror.org/05k73zm37"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W144656967","https://openalex.org/W197380572","https://openalex.org/W1496598473","https://openalex.org/W1563460361","https://openalex.org/W1601809792","https://openalex.org/W1963637322","https://openalex.org/W1970278793","https://openalex.org/W1975079546","https://openalex.org/W1990496939","https://openalex.org/W2009674825","https://openalex.org/W2021320575","https://openalex.org/W2029203798","https://openalex.org/W2043527182","https://openalex.org/W2049686551","https://openalex.org/W2066999516","https://openalex.org/W2074854222","https://openalex.org/W2075510576","https://openalex.org/W2081096848","https://openalex.org/W2091743772","https://openalex.org/W2100649345","https://openalex.org/W2109189270","https://openalex.org/W2109720703","https://openalex.org/W2110420312","https://openalex.org/W2111284386","https://openalex.org/W2129142580","https://openalex.org/W2135934764","https://openalex.org/W2144139079","https://openalex.org/W2145247325","https://openalex.org/W2154920538","https://openalex.org/W2159437158","https://openalex.org/W2294797155","https://openalex.org/W2295121007","https://openalex.org/W2395578248","https://openalex.org/W2398742733","https://openalex.org/W2399240891","https://openalex.org/W2405614646","https://openalex.org/W2406189128","https://openalex.org/W2417163358","https://openalex.org/W2471520273","https://openalex.org/W2519091744","https://openalex.org/W2524251915","https://openalex.org/W2586183200","https://openalex.org/W2598638573","https://openalex.org/W2604184139","https://openalex.org/W2610284765","https://openalex.org/W2616492649","https://openalex.org/W2666408839","https://openalex.org/W2745644908","https://openalex.org/W2748379347","https://openalex.org/W2749382813","https://openalex.org/W2750380119","https://openalex.org/W2964060510","https://openalex.org/W3094114204","https://openalex.org/W4235716345","https://openalex.org/W4251158933","https://openalex.org/W6605817804","https://openalex.org/W6607978602","https://openalex.org/W6636056872","https://openalex.org/W6677973343","https://openalex.org/W6680987829","https://openalex.org/W6696843773","https://openalex.org/W6697614067","https://openalex.org/W6711777497","https://openalex.org/W6712569816","https://openalex.org/W6712631415","https://openalex.org/W6713389654","https://openalex.org/W6736356763"],"related_works":["https://openalex.org/W1974895211","https://openalex.org/W2129841057","https://openalex.org/W3040712279","https://openalex.org/W2176409448","https://openalex.org/W2364769705","https://openalex.org/W2056136368","https://openalex.org/W2374664672","https://openalex.org/W2039489009","https://openalex.org/W2619026611","https://openalex.org/W2616987818"],"abstract_inverted_index":{"A":[0],"vocoder":[1,46,151,170,186,229],"is":[2,95,230],"used":[3],"to":[4,114,216],"express":[5],"a":[6,10,20,62,83,112,137,173,228],"speech":[7,21,66,124],"waveform":[8,89,224],"with":[9,39,77,82,167,176],"controllable":[11],"parametric":[12,65],"representation":[13],"that":[14,130,183,222],"can":[15,155],"be":[16],"converted":[17],"back":[18],"into":[19,75],"waveform.":[22],"Vocoders":[23],"representing":[24],"their":[25],"main":[26],"categories":[27],"(mixed":[28],"excitation,":[29],"glottal,":[30],"and":[31,41,80,148],"sinusoidal":[32,185],"vocoders)":[33],"were":[34,73],"compared":[35],"in":[36,61,190],"this":[37],"study":[38],"formal":[40],"crowd-sourced":[42],"listening":[43],"tests.":[44,200],"The":[45,126,160],"quality":[47,100,143,233],"was":[48,165],"measured":[49],"within":[50],"the":[51,70,88,93,99,105,116,122,131,134,141,145,149,168,180,184,193,198,205,209,211,217,223],"context":[52],"of":[53,92,104,118,133,144,208,227],"analysis-synthesis":[54],"as":[55,57,111],"well":[56],"text-to-speech":[58],"(TTS)":[59],"synthesis":[60,67,76,81],"modern":[63],"statistical":[64],"framework.":[68],"Furthermore,":[69],"TTS":[71,163],"experiments":[72],"divided":[74],"vocoder-specific":[78],"features":[79],"shared":[84],"envelope":[85],"model,":[86],"where":[87],"generation":[90,225],"method":[91,226],"vocoders":[94],"mainly":[96],"responsible":[97],"for":[98,152,204,232],"differences.":[101],"Finally,":[102,201],"all":[103],"tests":[106],"included":[107],"four":[108],"distinct":[109],"voices":[110],"way":[113],"investigate":[115],"effect":[117],"different":[119],"speakers":[120],"on":[121,140],"synthesized":[123],"quality.":[125],"obtained":[127,166],"results":[128,181],"suggest":[129],"choice":[132],"voice":[135,154,175],"has":[136,192],"profound":[138],"impact":[139],"overall":[142,195],"vocoder-generated":[146],"speech,":[147],"best":[150,194],"each":[153],"vary":[156],"case":[157],"by":[158],"case.":[159],"single":[161],"best-rated":[162],"system":[164],"glottal":[169],"GlottDNN":[171],"using":[172],"male":[174],"low":[177],"expressiveness.":[178],"However,":[179],"indicate":[182],"PML":[187],"(pulse":[188],"model":[189],"log-domain)":[191],"performance":[196],"across":[197],"performed":[199],"when":[202],"controlling":[203],"spectral":[206],"models":[207],"vocoders,":[210],"observed":[212],"differences":[213],"are":[214],"similar":[215],"baseline":[218],"results.":[219],"This":[220],"indicates":[221],"essential":[231],"improvements.":[234]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":3}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
