{"id":"https://openalex.org/W4388692791","doi":"https://doi.org/10.1109/sped59241.2023.10314948","title":"Advancing Limited Data Text-to-Speech Synthesis: Non-Autoregressive Transformer for High-Quality Parallel Synthesis","display_name":"Advancing Limited Data Text-to-Speech Synthesis: Non-Autoregressive Transformer for High-Quality Parallel Synthesis","publication_year":2023,"publication_date":"2023-10-25","ids":{"openalex":"https://openalex.org/W4388692791","doi":"https://doi.org/10.1109/sped59241.2023.10314948"},"language":"en","primary_location":{"id":"doi:10.1109/sped59241.2023.10314948","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sped59241.2023.10314948","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086670617","display_name":"Mohammed Salah Al-Radhi","orcid":"https://orcid.org/0000-0003-3094-6916"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":true,"raw_author_name":"Mohammed Salah Al-Radhi","raw_affiliation_strings":["Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","institution_ids":["https://openalex.org/I29770179"]},{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021793941","display_name":"Omnia Ibrahim","orcid":"https://orcid.org/0000-0002-3649-7376"},"institutions":[{"id":"https://openalex.org/I91712215","display_name":"Saarland University","ror":"https://ror.org/01jdpyv68","country_code":"DE","type":"education","lineage":["https://openalex.org/I91712215"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Omnia Ibrahim","raw_affiliation_strings":["Saarland University,Department of Language Science and Technology,Germany","Department of Language Science and Technology, Saarland University, Germany"],"affiliations":[{"raw_affiliation_string":"Saarland University,Department of Language Science and Technology,Germany","institution_ids":["https://openalex.org/I91712215"]},{"raw_affiliation_string":"Department of Language Science and Technology, Saarland University, Germany","institution_ids":["https://openalex.org/I91712215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007024435","display_name":"Ali Raheem Mandeel","orcid":"https://orcid.org/0000-0003-4188-2196"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Ali Raheem Mandeel","raw_affiliation_strings":["Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","institution_ids":["https://openalex.org/I29770179"]},{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016031960","display_name":"Tam\u00e1s G\u00e1bor Csap\u00f3","orcid":"https://orcid.org/0000-0003-4375-7524"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Tam\u00e1s G\u00e1bor Csap\u00f3","raw_affiliation_strings":["Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","institution_ids":["https://openalex.org/I29770179"]},{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069988513","display_name":"G\u00e9za N\u00e9meth","orcid":"https://orcid.org/0000-0002-2311-4858"},"institutions":[{"id":"https://openalex.org/I29770179","display_name":"Budapest University of Technology and Economics","ror":"https://ror.org/02w42ss30","country_code":"HU","type":"education","lineage":["https://openalex.org/I29770179"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"G\u00e9za N\u00e9meth","raw_affiliation_strings":["Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary"],"affiliations":[{"raw_affiliation_string":"Budapest University of Technology and Economics,Department of Telecommunications and Media Informatics,Budapest,Hungary","institution_ids":["https://openalex.org/I29770179"]},{"raw_affiliation_string":"Department of Telecommunications and Media Informatics, Budapest University of Technology and Economics, Budapest, Hungary","institution_ids":["https://openalex.org/I29770179"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086670617"],"corresponding_institution_ids":["https://openalex.org/I29770179"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57605869,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"152","last_page":"157"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8095065355300903},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.7422131896018982},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.7012947201728821},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7002602219581604},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6956402063369751},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.4830227792263031},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4719121754169464},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.41718196868896484},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38326388597488403}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8095065355300903},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.7422131896018982},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.7012947201728821},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7002602219581604},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6956402063369751},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.4830227792263031},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4719121754169464},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.41718196868896484},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38326388597488403},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sped59241.2023.10314948","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sped59241.2023.10314948","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1998648683","https://openalex.org/W2402036432","https://openalex.org/W2531638282","https://openalex.org/W2609357228","https://openalex.org/W2936103087","https://openalex.org/W2946200149","https://openalex.org/W2963035245","https://openalex.org/W2963300588","https://openalex.org/W2964243274","https://openalex.org/W2969521066","https://openalex.org/W2972394484","https://openalex.org/W2972831865","https://openalex.org/W2984229517","https://openalex.org/W3015282541","https://openalex.org/W3015338123","https://openalex.org/W3090831112","https://openalex.org/W3095545636","https://openalex.org/W3150282322","https://openalex.org/W3198332267","https://openalex.org/W3213544594","https://openalex.org/W4210849413","https://openalex.org/W4385245566","https://openalex.org/W4387941753","https://openalex.org/W4395961568","https://openalex.org/W6623517193","https://openalex.org/W6720802662","https://openalex.org/W6763832098","https://openalex.org/W6778823374","https://openalex.org/W6780218876","https://openalex.org/W6783867762","https://openalex.org/W6802379250"],"related_works":["https://openalex.org/W2183593636","https://openalex.org/W2350724007","https://openalex.org/W2355751417","https://openalex.org/W2423284978","https://openalex.org/W2053269318","https://openalex.org/W2546021431","https://openalex.org/W2581127593","https://openalex.org/W2904846757","https://openalex.org/W175280642","https://openalex.org/W2688184458"],"abstract_inverted_index":{"Despite":[0],"the":[1,89],"impressive":[2],"results":[3],"achieved":[4],"by":[5,123],"autoregressive":[6],"generative":[7],"models":[8,30],"like":[9,31,36,94,110],"Tacotron2":[10],"in":[11],"end-to-end":[12],"speech":[13,93,111],"synthesis,":[14],"their":[15],"slow":[16],"inference":[17],"speed":[18],"remains":[19],"a":[20,51,59,80],"significant":[21],"drawback.":[22],"To":[23],"overcome":[24],"this":[25,47],"limitation,":[26],"non-autoregressive":[27,81],"Text-to-Speech":[28],"(TTS)":[29],"FastSpeech2":[32],"and":[33,72,75,98,116,126],"neural":[34],"vocoders":[35],"AutoVocoder,":[37],"have":[38],"emerged":[39],"as":[40],"faster":[41],"alternatives":[42],"with":[43,101],"comparable":[44],"quality.":[45],"In":[46],"work,":[48],"we":[49],"present":[50],"novel":[52],"lightweight":[53],"Arabic":[54],"TTS":[55],"system":[56,85],"based":[57,78],"on":[58,79],"transformer":[60],"architecture":[61],"that":[62],"utilizes":[63],"fewer":[64],"parameters":[65],"than":[66],"Tacotron2.":[67],"Our":[68,84,119],"approach":[69],"combines":[70],"convolutional":[71],"transformer-based":[73],"blocks":[74],"is":[76,121],"fully":[77],"training":[82],"framework.":[83],"can":[86],"accurately":[87],"reproduce":[88],"characteristics":[90],"of":[91],"natural":[92],"tone,":[95],"pitch,":[96],"timing":[97],"word":[99],"pronunciation":[100],"state-of-the-art":[102],"quality,":[103],"making":[104],"it":[105],"suitable":[106],"for":[107,113],"practical":[108],"applications":[109],"synthesis":[112],"low-resource":[114],"languages":[115],"conversational":[117],"agents.":[118],"method":[120],"validated":[122],"acoustic":[124],"analysis":[125],"subjective":[127],"listening":[128],"tests.":[129]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
