{"id":"https://openalex.org/W4401338106","doi":"https://doi.org/10.1109/isivc61350.2024.10623342","title":"End-to-End Text-to-Speech Systems in Arabic: A Comparative Study","display_name":"End-to-End Text-to-Speech Systems in Arabic: A Comparative Study","publication_year":2024,"publication_date":"2024-05-21","ids":{"openalex":"https://openalex.org/W4401338106","doi":"https://doi.org/10.1109/isivc61350.2024.10623342"},"language":"en","primary_location":{"id":"doi:10.1109/isivc61350.2024.10623342","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/isivc61350.2024.10623342","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 12th International Symposium on Signal, Image, Video and Communications (ISIVC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048781950","display_name":"Mayda Alrige","orcid":"https://orcid.org/0000-0002-1315-7053"},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"education","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":true,"raw_author_name":"Mayda Alrige","raw_affiliation_strings":["King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019672674","display_name":"Omaima Almatrafi","orcid":"https://orcid.org/0000-0003-2105-2275"},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"education","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Omaima Almatrafi","raw_affiliation_strings":["King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068148507","display_name":"Riad Alharbey","orcid":"https://orcid.org/0000-0003-4968-950X"},"institutions":[{"id":"https://openalex.org/I4210099699","display_name":"Jeddah University","ror":"https://ror.org/015ya8798","country_code":"SA","type":"education","lineage":["https://openalex.org/I4210099699"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Riad Alharbey","raw_affiliation_strings":["Univercity of Jeddah,Computer Sceince and engineering,Jeddah,Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"Univercity of Jeddah,Computer Sceince and engineering,Jeddah,Saudi Arabia","institution_ids":["https://openalex.org/I4210099699"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113321554","display_name":"Mashail Alotaibi","orcid":null},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"education","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Mashail Alotaibi","raw_affiliation_strings":["King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106319755","display_name":"Maryah Almarri","orcid":null},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"education","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Maryah Almarri","raw_affiliation_strings":["King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034662465","display_name":"Lujain Alghamdi","orcid":null},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"education","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Lujain Alghamdi","raw_affiliation_strings":["King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"King Abdulaziz Univercity,Information Systems Department,Jeddah,Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5048781950"],"corresponding_institution_ids":["https://openalex.org/I185163786"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10390585,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7037972211837769},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.6995623707771301},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.5707904696464539},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5529463887214661},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.548021137714386},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5039319396018982},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4814555048942566},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.39042001962661743},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38681456446647644},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.11432746052742004},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06610897183418274}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7037972211837769},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.6995623707771301},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.5707904696464539},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5529463887214661},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.548021137714386},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5039319396018982},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4814555048942566},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.39042001962661743},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38681456446647644},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.11432746052742004},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06610897183418274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isivc61350.2024.10623342","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/isivc61350.2024.10623342","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 12th International Symposium on Signal, Image, Video and Communications (ISIVC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2048350934","https://openalex.org/W2102737569","https://openalex.org/W2129142580","https://openalex.org/W2150658333","https://openalex.org/W2157075408","https://openalex.org/W2428180336","https://openalex.org/W2907920457","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2972882294","https://openalex.org/W2975384707","https://openalex.org/W2976159681","https://openalex.org/W3090831112","https://openalex.org/W3150282322","https://openalex.org/W4200043252","https://openalex.org/W4239803624","https://openalex.org/W4280635064","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6784545093"],"related_works":["https://openalex.org/W2990025607","https://openalex.org/W3045103338","https://openalex.org/W3007142233","https://openalex.org/W2338806053","https://openalex.org/W4385571610","https://openalex.org/W123774389","https://openalex.org/W39235475","https://openalex.org/W4399356803","https://openalex.org/W3177132412","https://openalex.org/W3198731777"],"abstract_inverted_index":{"This":[0],"study":[1],"compares":[2],"the":[3,12,17,25,28,52,55,133],"performance":[4],"of":[5,46,70,72,86,88],"two":[6],"popular":[7],"end-to-end":[8,102],"text-to-speech":[9],"(TTS)":[10],"systems,":[11],"Tacotron":[13,18,62,80],"and":[14,27,43,49],"its":[15],"successor,":[16],"2,":[19],"each":[20],"used":[21],"with":[22,63,82],"rival":[23],"vocoders,":[24],"WaveNet":[26,64],"WaveGlow,":[29],"respectively.":[30],"We":[31,90,110],"conducted":[32],"experiments":[33],"on":[34,67],"Nawar":[35],"Halabi\u2019s":[36],"dataset,":[37],"which":[38],"contains":[39],"approximately":[40],"three":[41],"hours":[42],"forty-two":[44],"minutes":[45],"Arabic":[47],"speech":[48],"qualitatively":[50],"evaluated":[51],"models":[53],"using":[54],"mean":[56,76],"opinion":[57],"score":[58,74],"(MOS).":[59],"The":[60],"original":[61],"achieved":[65],"4.2":[66],"a":[68,73,97,118],"scale":[69],"5":[71],"for":[75,104,123],"opinion,":[77],"thus":[78],"outperforming":[79],"2":[81],"WaveGlow":[83],"in":[84,100,114],"terms":[85],"naturalness":[87],"speech.":[89],"found":[91],"that":[92],"crafted":[93],"text":[94,115],"analysis":[95],"is":[96],"crucial":[98],"step":[99,120],"improving":[101],"TTS":[103],"complex":[105],"languages,":[106],"such":[107,126],"as":[108,117,127,129,131],"Arabic.":[109],"recommend":[111],"investing":[112],"more":[113],"preprocessing":[116],"prior":[119],"to":[121],"accounting":[122],"language-specific":[124],"features":[125],"diacritic,":[128],"well":[130],"enhancing":[132],"voice":[134],"quality":[135],"produced":[136],"through":[137],"prosody":[138],"modeling.":[139]},"counts_by_year":[],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
