{"id":"https://openalex.org/W4293053484","doi":"https://doi.org/10.1109/civemsa53371.2022.9853712","title":"Towards a Vowel Formant Based Quality Metric for Text-to-Speech Systems: Measuring Monophthong Naturalness","display_name":"Towards a Vowel Formant Based Quality Metric for Text-to-Speech Systems: Measuring Monophthong Naturalness","publication_year":2022,"publication_date":"2022-06-15","ids":{"openalex":"https://openalex.org/W4293053484","doi":"https://doi.org/10.1109/civemsa53371.2022.9853712"},"language":"en","primary_location":{"id":"doi:10.1109/civemsa53371.2022.9853712","is_oa":false,"landing_page_url":"https://doi.org/10.1109/civemsa53371.2022.9853712","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 9th International Conference on Computational Intelligence and Virtual Environments for Measurement Systems and Applications (CIVEMSA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078471729","display_name":"Sven Albrecht","orcid":"https://orcid.org/0000-0002-3678-8878"},"institutions":[{"id":"https://openalex.org/I2610724","display_name":"Chemnitz University of Technology","ror":"https://ror.org/00a208s56","country_code":"DE","type":"education","lineage":["https://openalex.org/I2610724"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Sven Albrecht","raw_affiliation_strings":["Chemnitz University of Technology,Professorship for English Language and Linguistics","Professorship for English Language and Linguistics, Chemnitz University of Technology"],"affiliations":[{"raw_affiliation_string":"Chemnitz University of Technology,Professorship for English Language and Linguistics","institution_ids":["https://openalex.org/I2610724"]},{"raw_affiliation_string":"Professorship for English Language and Linguistics, Chemnitz University of Technology","institution_ids":["https://openalex.org/I2610724"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013136015","display_name":"Rewa Tamboli","orcid":"https://orcid.org/0000-0002-5659-6582"},"institutions":[{"id":"https://openalex.org/I2610724","display_name":"Chemnitz University of Technology","ror":"https://ror.org/00a208s56","country_code":"DE","type":"education","lineage":["https://openalex.org/I2610724"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Rewa Tamboli","raw_affiliation_strings":["Chemnitz University of Technology,Professorship for Psychology of Learning With Digital Media","Professorship for Psychology of Learning With Digital Media, Chemnitz University of Technology"],"affiliations":[{"raw_affiliation_string":"Chemnitz University of Technology,Professorship for Psychology of Learning With Digital Media","institution_ids":["https://openalex.org/I2610724"]},{"raw_affiliation_string":"Professorship for Psychology of Learning With Digital Media, Chemnitz University of Technology","institution_ids":["https://openalex.org/I2610724"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024987550","display_name":"Stefan Taubert","orcid":"https://orcid.org/0000-0002-4932-2874"},"institutions":[{"id":"https://openalex.org/I2610724","display_name":"Chemnitz University of Technology","ror":"https://ror.org/00a208s56","country_code":"DE","type":"education","lineage":["https://openalex.org/I2610724"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Taubert","raw_affiliation_strings":["University of Technology,Professorship for Media Informatics Chemnitz","Professorship for Media Informatics Chemnitz, University of Technology"],"affiliations":[{"raw_affiliation_string":"University of Technology,Professorship for Media Informatics Chemnitz","institution_ids":["https://openalex.org/I2610724"]},{"raw_affiliation_string":"Professorship for Media Informatics Chemnitz, University of Technology","institution_ids":["https://openalex.org/I2610724"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071056491","display_name":"Maximilian Eibl","orcid":"https://orcid.org/0000-0002-9519-2708"},"institutions":[{"id":"https://openalex.org/I2610724","display_name":"Chemnitz University of Technology","ror":"https://ror.org/00a208s56","country_code":"DE","type":"education","lineage":["https://openalex.org/I2610724"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Maximilian Eibl","raw_affiliation_strings":["University of Technology,Professorship for Media Informatics Chemnitz","Professorship for Media Informatics Chemnitz, University of Technology"],"affiliations":[{"raw_affiliation_string":"University of Technology,Professorship for Media Informatics Chemnitz","institution_ids":["https://openalex.org/I2610724"]},{"raw_affiliation_string":"Professorship for Media Informatics Chemnitz, University of Technology","institution_ids":["https://openalex.org/I2610724"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001896504","display_name":"G\u00fcnter Daniel Rey","orcid":"https://orcid.org/0000-0001-9717-8478"},"institutions":[{"id":"https://openalex.org/I2610724","display_name":"Chemnitz University of Technology","ror":"https://ror.org/00a208s56","country_code":"DE","type":"education","lineage":["https://openalex.org/I2610724"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gunter Daniel Rey","raw_affiliation_strings":["Chemnitz University of Technology,Professorship for Psychology of Learning With Digital Media","Professorship for Psychology of Learning With Digital Media, Chemnitz University of Technology"],"affiliations":[{"raw_affiliation_string":"Chemnitz University of Technology,Professorship for Psychology of Learning With Digital Media","institution_ids":["https://openalex.org/I2610724"]},{"raw_affiliation_string":"Professorship for Psychology of Learning With Digital Media, Chemnitz University of Technology","institution_ids":["https://openalex.org/I2610724"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080157229","display_name":"Josef Schmied","orcid":"https://orcid.org/0000-0001-8499-3158"},"institutions":[{"id":"https://openalex.org/I2610724","display_name":"Chemnitz University of Technology","ror":"https://ror.org/00a208s56","country_code":"DE","type":"education","lineage":["https://openalex.org/I2610724"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Josef Schmied","raw_affiliation_strings":["Chemnitz University of Technology,Professorship for English Language and Linguistics","Professorship for English Language and Linguistics, Chemnitz University of Technology"],"affiliations":[{"raw_affiliation_string":"Chemnitz University of Technology,Professorship for English Language and Linguistics","institution_ids":["https://openalex.org/I2610724"]},{"raw_affiliation_string":"Professorship for English Language and Linguistics, Chemnitz University of Technology","institution_ids":["https://openalex.org/I2610724"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5078471729"],"corresponding_institution_ids":["https://openalex.org/I2610724"],"apc_list":null,"apc_paid":null,"fwci":0.1487,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.39238159,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/formant","display_name":"Formant","score":0.9065410494804382},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8879689574241638},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6948230266571045},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6829748749732971},{"id":"https://openalex.org/keywords/vowel","display_name":"Vowel","score":0.6435268521308899},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6361156105995178},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.49636560678482056},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.45147624611854553},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4394809305667877},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.43777161836624146},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.42695847153663635},{"id":"https://openalex.org/keywords/psychoacoustics","display_name":"Psychoacoustics","score":0.41118407249450684},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32844361662864685},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.22033047676086426}],"concepts":[{"id":"https://openalex.org/C158215666","wikidata":"https://www.wikidata.org/wiki/Q1414685","display_name":"Formant","level":3,"score":0.9065410494804382},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8879689574241638},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6948230266571045},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6829748749732971},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.6435268521308899},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6361156105995178},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.49636560678482056},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.45147624611854553},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4394809305667877},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.43777161836624146},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.42695847153663635},{"id":"https://openalex.org/C9940772","wikidata":"https://www.wikidata.org/wiki/Q557399","display_name":"Psychoacoustics","level":3,"score":0.41118407249450684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32844361662864685},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.22033047676086426},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/civemsa53371.2022.9853712","is_oa":false,"landing_page_url":"https://doi.org/10.1109/civemsa53371.2022.9853712","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 9th International Conference on Computational Intelligence and Virtual Environments for Measurement Systems and Applications (CIVEMSA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2892072280","https://openalex.org/W2130095015","https://openalex.org/W4399013759","https://openalex.org/W3034731095","https://openalex.org/W2996518094","https://openalex.org/W2364915322","https://openalex.org/W1485474031","https://openalex.org/W2185350064","https://openalex.org/W2789044730","https://openalex.org/W4226256361"],"abstract_inverted_index":{"This":[0,21],"contribution":[1],"proposes":[2],"an":[3],"objective,":[4],"vowel":[5,38,148,158,205],"formant":[6,138],"based":[7,90],"quality":[8,53,67,120],"metric":[9,54,61,121],"for":[10,26,46,55,77,130,136,174],"assessing":[11],"the":[12,24,66,97,103,113,126,147,157,161,165,175,178,182,201,204],"naturalness":[13],"of":[14,68,116,160,181,203,207],"monophthongs":[15],"synthesized":[16,166],"by":[17],"Text-to-Speech":[18],"(TTS)":[19],"systems.":[20,57,82],"could":[22,72],"eliminate":[23],"need":[25],"time":[27],"and":[28,93,111,133,140,150,164,177],"resource-intensive":[29],"perception-based":[30],"mean":[31],"opinion":[32],"score":[33],"metrics.":[34],"We":[35,83],"show":[36,155,194],"that":[37,156,195],"space":[39],"plots":[40],"can":[41],"serve":[42],"as":[43,106],"a":[44,48,85,141,170],"basis":[45],"developing":[47],"more":[49,185],"comprehensive,":[50],"linguistically":[51],"sound":[52],"TTS":[56,81,88],"In":[58],"addition,":[59],"our":[60,108],"provides":[62],"detailed":[63],"insights":[64],"on":[65,91,96],"individual":[69],"monophthongs,":[70],"which":[71],"help":[73],"with":[74,184],"identifying":[75],"areas":[76],"further":[78],"optimization":[79],"in":[80,107,200],"use":[84],"state-of-the-art":[86],"neural":[87],"pipeline":[89],"Tacotron":[92],"WaveGlow,":[94],"trained":[95],"LJ":[98],"Speech":[99],"dataset,":[100],"to":[101,145,169],"generate":[102],"same":[104],"audio":[105,117,163,167],"validation":[109],"set":[110],"compare":[112],"two":[114],"sets":[115],"files.":[118],"Our":[119,153],"is":[122,197],"automatically":[123],"calculated":[124],"using":[125],"Montreal":[127],"Forced":[128],"Aligner":[129],"aligning":[131],"text":[132],"audio,":[134],"Praat":[135],"measuring":[137],"values":[139],"custom":[142],"R":[143],"script":[144],"calculate":[146],"spaces":[149,159,206],"their":[151],"overlap.":[152],"results":[154],"original":[162],"overlap":[168,202],"large":[171],"extent,":[172],"especially":[173],"means":[176],"central":[179],"75%":[180],"data,":[183],"variation":[186,199],"when":[187],"considering":[188],"all":[189],"data":[190],"points.":[191],"Furthermore,":[192],"we":[193],"there":[196],"considerable":[198],"different":[208],"monophthongs.":[209]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
