{"id":"https://openalex.org/W4306246947","doi":"https://doi.org/10.1109/slt54892.2023.10022766","title":"Can We Use Common Voice to Train a Multi-Speaker TTS System?","display_name":"Can We Use Common Voice to Train a Multi-Speaker TTS System?","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4306246947","doi":"https://doi.org/10.1109/slt54892.2023.10022766"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10022766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10022766","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-03812715/document","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087685199","display_name":"Sewade Ogun","orcid":null},"institutions":[{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Sewade Ogun","raw_affiliation_strings":["Universit&#x00E9; de Lorraine, CNRS,Nancy,France,F-54000"],"affiliations":[{"raw_affiliation_string":"Universit&#x00E9; de Lorraine, CNRS,Nancy,France,F-54000","institution_ids":["https://openalex.org/I90183372","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039712983","display_name":"Vincent Colotte","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Vincent Colotte","raw_affiliation_strings":["Universit&#x00E9; de Lorraine, CNRS,Nancy,France,F-54000"],"affiliations":[{"raw_affiliation_string":"Universit&#x00E9; de Lorraine, CNRS,Nancy,France,F-54000","institution_ids":["https://openalex.org/I90183372","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065635001","display_name":"Emmanuel Vincent","orcid":"https://orcid.org/0000-0002-0183-7289"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emmanuel Vincent","raw_affiliation_strings":["Universit&#x00E9; de Lorraine, CNRS,Nancy,France,F-54000"],"affiliations":[{"raw_affiliation_string":"Universit&#x00E9; de Lorraine, CNRS,Nancy,France,F-54000","institution_ids":["https://openalex.org/I90183372","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5087685199"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I90183372"],"apc_list":null,"apc_paid":null,"fwci":0.33,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.4371134,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"900","last_page":"905"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8360198736190796},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6556766629219055},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6485239267349243},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.6175488829612732},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.6030740141868591},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5556224584579468},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5221919417381287},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4637603759765625},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4559434652328491},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4356667995452881},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4277232885360718},{"id":"https://openalex.org/keywords/lexical-diversity","display_name":"Lexical diversity","score":0.41005223989486694},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37621191143989563},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.09248214960098267},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.059908390045166016}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8360198736190796},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6556766629219055},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6485239267349243},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.6175488829612732},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.6030740141868591},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5556224584579468},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5221919417381287},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4637603759765625},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4559434652328491},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4356667995452881},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4277232885360718},{"id":"https://openalex.org/C2781202465","wikidata":"https://www.wikidata.org/wiki/Q18346297","display_name":"Lexical diversity","level":3,"score":0.41005223989486694},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37621191143989563},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.09248214960098267},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.059908390045166016},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/slt54892.2023.10022766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10022766","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03812715v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03812715","pdf_url":"https://hal.science/hal-03812715/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The 2022 IEEE Spoken Language Technology Workshop (SLT 2022), Jan 2023, Doha, Qatar","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-03812715v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03812715","pdf_url":"https://hal.science/hal-03812715/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The 2022 IEEE Spoken Language Technology Workshop (SLT 2022), Jan 2023, Doha, Qatar","raw_type":"Conference papers"},"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320313934","display_name":"Institut national de recherche en informatique et en automatique (INRIA)","ror":"https://ror.org/02kvxyf05"},{"id":"https://openalex.org/F4320333359","display_name":"College of Natural Resources and Sciences, Humboldt State University","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4306246947.pdf","grobid_xml":"https://content.openalex.org/works/W4306246947.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1552314771","https://openalex.org/W2029802039","https://openalex.org/W2527515520","https://openalex.org/W2557915412","https://openalex.org/W2749074593","https://openalex.org/W2907262790","https://openalex.org/W2913512413","https://openalex.org/W2972359262","https://openalex.org/W2972394484","https://openalex.org/W2974231335","https://openalex.org/W2998572311","https://openalex.org/W3015645837","https://openalex.org/W3016021263","https://openalex.org/W3016181583","https://openalex.org/W3026874504","https://openalex.org/W3030437843","https://openalex.org/W3092028330","https://openalex.org/W3095717210","https://openalex.org/W3163906773","https://openalex.org/W3196584150","https://openalex.org/W3202278141","https://openalex.org/W3208049241","https://openalex.org/W4221150649","https://openalex.org/W4224929761","https://openalex.org/W4286909631","https://openalex.org/W6729924827","https://openalex.org/W6767671539","https://openalex.org/W6769050887","https://openalex.org/W6771467084","https://openalex.org/W6773553514","https://openalex.org/W6777694618","https://openalex.org/W6783867762","https://openalex.org/W6802119844","https://openalex.org/W6803017244","https://openalex.org/W6809863597"],"related_works":["https://openalex.org/W4287880334","https://openalex.org/W4366700029","https://openalex.org/W4285230481","https://openalex.org/W4385769873","https://openalex.org/W2015759683","https://openalex.org/W4281634296","https://openalex.org/W4319161863","https://openalex.org/W2371687270","https://openalex.org/W4307819175","https://openalex.org/W4311888330"],"abstract_inverted_index":{"Training":[0],"of":[1,36,53,86,107,148],"multi-speaker":[2,92],"text-to-speech":[3],"(TTS)":[4],"systems":[5],"relies":[6],"on":[7,11,95,118,131],"curated":[8],"datasets":[9,17,49],"based":[10],"high-quality":[12,70],"recordings":[13],"or":[14],"audiobooks.":[15],"Such":[16],"often":[18],"lack":[19],"speaker":[20],"diversity":[21],"and":[22,122],"are":[23],"expensive":[24],"to":[25,67,116,129,139],"collect.":[26],"As":[27],"an":[28],"alternative,":[29],"recent":[30],"studies":[31],"have":[32],"leveraged":[33],"the":[34,51,84,96,104,120,132,137],"availability":[35],"large,":[37],"crowdsourced":[38],"automatic":[39],"speech":[40],"recognition":[41],"(ASR)":[42],"datasets.":[43],"A":[44],"major":[45],"problem":[46],"with":[47,114,127],"such":[48],"is":[50],"presence":[52],"noisy":[54],"and/or":[55],"distorted":[56],"samples,":[57],"which":[58],"degrade":[59],"TTS":[60,141],"quality.":[61],"In":[62],"this":[63,87],"paper,":[64],"we":[65],"propose":[66],"automatically":[68],"select":[69],"training":[71,90,117,130],"samples":[72,121],"using":[73],"a":[74,91,145],"non-intrusive":[75],"mean":[76],"opinion":[77],"score":[78],"(MOS)":[79],"estimator,":[80],"WV-MOS.":[81],"We":[82],"show":[83],"viability":[85],"approach":[88,102],"for":[89,144],"GlowTTS":[93],"model":[94],"Common":[97],"Voice":[98],"English":[99],"dataset.":[100,134],"Our":[101],"improves":[103],"overall":[105],"quality":[106],"generated":[108],"utterances":[109],"by":[110,123],"1.26":[111],"MOS":[112,125],"point":[113,126],"respect":[115,128],"all":[119],"0.35":[124],"LibriTTS":[133],"This":[135],"opens":[136],"door":[138],"au-tomatic":[140],"dataset":[142],"curation":[143],"wider":[146],"range":[147],"languages.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
