{"id":"https://openalex.org/W4415052592","doi":"https://doi.org/10.3390/info16100879","title":"Speech Recognition and Synthesis Models and Platforms for the Kazakh Language","display_name":"Speech Recognition and Synthesis Models and Platforms for the Kazakh Language","publication_year":2025,"publication_date":"2025-10-10","ids":{"openalex":"https://openalex.org/W4415052592","doi":"https://doi.org/10.3390/info16100879"},"language":"en","primary_location":{"id":"doi:10.3390/info16100879","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16100879","pdf_url":null,"source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/info16100879","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021414240","display_name":"Aidana Karibayeva","orcid":"https://orcid.org/0000-0002-2023-1573"},"institutions":[{"id":"https://openalex.org/I185571130","display_name":"Al-Farabi Kazakh National University","ror":"https://ror.org/03q0vrn42","country_code":"KZ","type":"education","lineage":["https://openalex.org/I185571130"]}],"countries":["KZ"],"is_corresponding":true,"raw_author_name":"Aidana Karibayeva","raw_affiliation_strings":["Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan","institution_ids":["https://openalex.org/I185571130"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004586958","display_name":"Vladislav Karyukin","orcid":"https://orcid.org/0000-0002-8768-0349"},"institutions":[{"id":"https://openalex.org/I185571130","display_name":"Al-Farabi Kazakh National University","ror":"https://ror.org/03q0vrn42","country_code":"KZ","type":"education","lineage":["https://openalex.org/I185571130"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Vladislav Karyukin","raw_affiliation_strings":["Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan"],"raw_orcid":"https://orcid.org/0000-0002-8768-0349","affiliations":[{"raw_affiliation_string":"Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan","institution_ids":["https://openalex.org/I185571130"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079604919","display_name":"Balzhan Abduali","orcid":"https://orcid.org/0000-0003-0140-4181"},"institutions":[{"id":"https://openalex.org/I185571130","display_name":"Al-Farabi Kazakh National University","ror":"https://ror.org/03q0vrn42","country_code":"KZ","type":"education","lineage":["https://openalex.org/I185571130"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Balzhan Abduali","raw_affiliation_strings":["Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan","institution_ids":["https://openalex.org/I185571130"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000341686","display_name":"Dina Amirova","orcid":"https://orcid.org/0000-0002-0728-905X"},"institutions":[{"id":"https://openalex.org/I185571130","display_name":"Al-Farabi Kazakh National University","ror":"https://ror.org/03q0vrn42","country_code":"KZ","type":"education","lineage":["https://openalex.org/I185571130"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Dina Amirova","raw_affiliation_strings":["Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Systems Department, Faculty of Information Technology and Artificial Intelligence, Farabi University, Almaty 050040, Kazakhstan","institution_ids":["https://openalex.org/I185571130"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5021414240"],"corresponding_institution_ids":["https://openalex.org/I185571130"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":2.0776,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90372003,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"16","issue":"10","first_page":"879","last_page":"879"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.6452999711036682},{"id":"https://openalex.org/keywords/pesq","display_name":"PESQ","score":0.5637000203132629},{"id":"https://openalex.org/keywords/agglutinative-language","display_name":"Agglutinative language","score":0.5296000242233276},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5091000199317932},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.49320000410079956},{"id":"https://openalex.org/keywords/vowel","display_name":"Vowel","score":0.44839999079704285},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.39430001378059387},{"id":"https://openalex.org/keywords/phonetics","display_name":"Phonetics","score":0.35120001435279846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8119000196456909},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7315000295639038},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.6452999711036682},{"id":"https://openalex.org/C103734657","wikidata":"https://www.wikidata.org/wiki/Q2739975","display_name":"PESQ","level":4,"score":0.5637000203132629},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5371999740600586},{"id":"https://openalex.org/C80875076","wikidata":"https://www.wikidata.org/wiki/Q171263","display_name":"Agglutinative language","level":3,"score":0.5296000242233276},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5091000199317932},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4970000088214874},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.49320000410079956},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.44839999079704285},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.39430001378059387},{"id":"https://openalex.org/C137584468","wikidata":"https://www.wikidata.org/wiki/Q35395","display_name":"Phonetics","level":2,"score":0.35120001435279846},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C76978605","wikidata":"https://www.wikidata.org/wiki/Q102532","display_name":"Diphthong","level":3,"score":0.323199987411499},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.2797999978065491},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.27639999985694885},{"id":"https://openalex.org/C99209842","wikidata":"https://www.wikidata.org/wiki/Q643696","display_name":"Speech perception","level":3,"score":0.27320000529289246},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C2781297163","wikidata":"https://www.wikidata.org/wiki/Q9252","display_name":"Kazakh","level":2,"score":0.25769999623298645}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/info16100879","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16100879","pdf_url":null,"source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:3cdccd36743240148e44257439c0e66d","is_oa":true,"landing_page_url":"https://doaj.org/article/3cdccd36743240148e44257439c0e66d","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 16, Iss 10, p 879 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info16100879","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16100879","pdf_url":null,"source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1524956127","https://openalex.org/W2101105183","https://openalex.org/W2141998673","https://openalex.org/W2209129844","https://openalex.org/W2250342921","https://openalex.org/W2962780374","https://openalex.org/W2964243274","https://openalex.org/W2979826702","https://openalex.org/W3008480565","https://openalex.org/W3035390927","https://openalex.org/W3085387930","https://openalex.org/W3097777922","https://openalex.org/W3105214104","https://openalex.org/W3108384977","https://openalex.org/W3111398107","https://openalex.org/W3155453618","https://openalex.org/W3161480375","https://openalex.org/W3202648695","https://openalex.org/W3203325857","https://openalex.org/W4280556124","https://openalex.org/W4296068757","https://openalex.org/W4297841565","https://openalex.org/W4313415426","https://openalex.org/W4317425824","https://openalex.org/W4318484710","https://openalex.org/W4366283726","https://openalex.org/W4384928248","https://openalex.org/W4385483851","https://openalex.org/W4385823254","https://openalex.org/W4391037608","https://openalex.org/W4392981865","https://openalex.org/W4399265137","https://openalex.org/W4399704219","https://openalex.org/W4403839424","https://openalex.org/W4404900640","https://openalex.org/W4405786206","https://openalex.org/W4406107190","https://openalex.org/W4413372773"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,23,35,73,86,150,203,214],"rapid":[2],"development":[3],"of":[4,22,26,34,60],"artificial":[5],"intelligence":[6],"and":[7,15,49,64,70,82,92,97,111,123,134,140,173,230],"machine":[8],"learning":[9],"technologies,":[10],"automatic":[11],"speech":[12,51,62,126],"recognition":[13,63,114],"(ASR)":[14],"text-to-speech":[16,199],"(TTS)":[17],"have":[18],"become":[19],"key":[20],"components":[21],"digital":[24],"transformation":[25],"society.":[27],"The":[28,143],"Kazakh":[29,74],"language,":[30],"as":[31],"a":[32,40,57],"representative":[33],"Turkic":[36],"language":[37,42,47],"family,":[38],"remains":[39],"low-resource":[41],"with":[43,130],"limited":[44],"audio":[45],"corpora,":[46],"models,":[48,66],"high-quality":[50],"synthesis":[52,65,127],"systems.":[53],"This":[54],"study":[55],"provides":[56],"comprehensive":[58],"analysis":[59],"existing":[61],"emphasizing":[67],"their":[68],"applicability":[69],"adaptation":[71,195],"to":[72,80,182],"language.":[75],"Special":[76],"attention":[77],"is":[78],"given":[79],"linguistic":[81],"technical":[83],"barriers,":[84],"including":[85,102],"agglutinative":[87],"structure,":[88],"rich":[89],"vowel":[90],"system,":[91],"phonemic":[93],"variability.":[94],"Both":[95],"open-source":[96],"commercial":[98],"solutions":[99],"were":[100,116],"evaluated,":[101],"Whisper,":[103],"GPT-4":[104,174],"Transcribe,":[105],"ElevenLabs,":[106],"OpenAI":[107,211],"TTS,":[108],"Voiser,":[109],"KazakhTTS2,":[110],"TurkicTTS.":[112],"Speech":[113],"systems":[115],"assessed":[117],"using":[118],"BLEU,":[119],"WER,":[120],"TER,":[121],"chrF,":[122],"COMET,":[124],"while":[125,164,210],"was":[128,153],"evaluated":[129],"MCD,":[131],"PESQ,":[132],"STOI,":[133],"DNSMOS,":[135],"thus":[136],"covering":[137],"both":[138],"lexical\u2013semantic":[139],"acoustic\u2013perceptual":[141],"characteristics.":[142],"results":[144],"demonstrate":[145],"that,":[146],"for":[147,196],"speech-to-text":[148],"(STT),":[149],"strongest":[151],"performance":[152],"achieved":[154,176,213],"by":[155],"Soyle":[156],"on":[157],"domain-specific":[158],"data":[159],"(BLEU":[160],"74.93,":[161],"WER":[162],"18.61),":[163],"Voiser":[165],"showed":[166],"balanced":[167],"accuracy":[168,217],"(WER":[169,189],"40.65\u201337.11,":[170],"chrF":[171],"80.88\u201384.51)":[172],"Transcribe":[175],"robust":[177],"semantic":[178],"preservation":[179],"(COMET":[180],"up":[181],"1.02).":[183],"In":[184],"contrast,":[185],"Whisper":[186],"performed":[187],"weakest":[188],"77.10,":[190],"BLEU":[191],"13.22),":[192],"requiring":[193],"further":[194],"Kazakh.":[197],"For":[198],"(TTS),":[200],"KazakhTTS2":[201],"delivered":[202],"most":[204],"natural":[205,233],"perceptual":[206],"quality":[207],"(DNSMOS":[208],"8.79\u20138.96),":[209],"TTS":[212],"best":[215],"spectral":[216],"(MCD":[218],"123.44\u2013117.11,":[219],"PESQ":[220,228],"1.14).":[221],"TurkicTTS":[222],"offered":[223],"reliable":[224],"intelligibility":[225],"(STOI":[226],"0.15,":[227],"1.16),":[229],"ElevenLabs":[231],"produced":[232],"but":[234],"less":[235],"spectrally":[236],"accurate":[237],"speech.":[238]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-11T00:00:00"}
