{"id":"https://openalex.org/W4319862711","doi":"https://doi.org/10.1109/slt54892.2023.10023322","title":"NIX-TTS: Lightweight and End-to-End Text-to-Speech Via Module-Wise Distillation","display_name":"NIX-TTS: Lightweight and End-to-End Text-to-Speech Via Module-Wise Distillation","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862711","doi":"https://doi.org/10.1109/slt54892.2023.10023322"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10023322","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023322","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026413944","display_name":"Rendi Chevi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rendi Chevi","raw_affiliation_strings":["Kata.ai Research Team,ID","Kata.ai Research Team, ID"],"affiliations":[{"raw_affiliation_string":"Kata.ai Research Team,ID","institution_ids":[]},{"raw_affiliation_string":"Kata.ai Research Team, ID","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074108268","display_name":"Radityo Eko Prasojo","orcid":"https://orcid.org/0000-0002-5148-7299"},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Radityo Eko Prasojo","raw_affiliation_strings":["Kata.ai Research Team,ID","Universitas Indonesia, ID","Kata.ai Research Team, ID"],"affiliations":[{"raw_affiliation_string":"Kata.ai Research Team,ID","institution_ids":[]},{"raw_affiliation_string":"Universitas Indonesia, ID","institution_ids":["https://openalex.org/I29617571"]},{"raw_affiliation_string":"Kata.ai Research Team, ID","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112924039","display_name":"Alham Fikri Aji","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123934","display_name":"Amazon (United Kingdom)","ror":"https://ror.org/02xey9634","country_code":"GB","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210123934"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alham Fikri Aji","raw_affiliation_strings":["Amazon,UK","Amazon, UK"],"affiliations":[{"raw_affiliation_string":"Amazon,UK","institution_ids":["https://openalex.org/I4210123934"]},{"raw_affiliation_string":"Amazon, UK","institution_ids":["https://openalex.org/I4210123934"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038296765","display_name":"Andros Tjandra","orcid":"https://orcid.org/0000-0003-1246-5908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andros Tjandra","raw_affiliation_strings":["Meta AI,USA","Meta AI, USA"],"affiliations":[{"raw_affiliation_string":"Meta AI,USA","institution_ids":[]},{"raw_affiliation_string":"Meta AI, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Japan Advanced Institute of Science and Technology,JP","Japan Advanced Institute of Science and Technology, JP"],"affiliations":[{"raw_affiliation_string":"Japan Advanced Institute of Science and Technology,JP","institution_ids":["https://openalex.org/I177738480"]},{"raw_affiliation_string":"Japan Advanced Institute of Science and Technology, JP","institution_ids":["https://openalex.org/I177738480"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5026413944"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3391,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.45549193,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"970","last_page":"976"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.756158709526062},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7050551176071167},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5582853555679321},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5298898816108704},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5129077434539795},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.48933786153793335},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.4837751090526581},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.43734824657440186},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.41810211539268494},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4149259328842163},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32693424820899963},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1350349485874176},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0808749794960022},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06931045651435852}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.756158709526062},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7050551176071167},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5582853555679321},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5298898816108704},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5129077434539795},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.48933786153793335},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.4837751090526581},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.43734824657440186},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.41810211539268494},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4149259328842163},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32693424820899963},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1350349485874176},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0808749794960022},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06931045651435852},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10023322","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023322","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8600000143051147}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W2531409750","https://openalex.org/W2593414223","https://openalex.org/W2908510526","https://openalex.org/W2963300588","https://openalex.org/W2963975282","https://openalex.org/W2964167449","https://openalex.org/W2964243274","https://openalex.org/W3000389243","https://openalex.org/W3026874504","https://openalex.org/W3033411150","https://openalex.org/W3036601975","https://openalex.org/W3092028330","https://openalex.org/W3095883095","https://openalex.org/W3125481789","https://openalex.org/W3156871171","https://openalex.org/W3160919572","https://openalex.org/W3169905056","https://openalex.org/W3194000401","https://openalex.org/W3196001064","https://openalex.org/W3197649190","https://openalex.org/W3203852729","https://openalex.org/W4287694050","https://openalex.org/W4287761884","https://openalex.org/W4294619240","https://openalex.org/W4297798428","https://openalex.org/W4320930577","https://openalex.org/W4385245566","https://openalex.org/W6638523607","https://openalex.org/W6687506355","https://openalex.org/W6714644935","https://openalex.org/W6739901393","https://openalex.org/W6746023985","https://openalex.org/W6753855596","https://openalex.org/W6757817989","https://openalex.org/W6771763809","https://openalex.org/W6777694618","https://openalex.org/W6778823374","https://openalex.org/W6779337556","https://openalex.org/W6780218876","https://openalex.org/W6781251213","https://openalex.org/W6783867762","https://openalex.org/W6789577077","https://openalex.org/W6794528836","https://openalex.org/W6796464841","https://openalex.org/W6800389019","https://openalex.org/W6800393981","https://openalex.org/W6802531207","https://openalex.org/W6843673214"],"related_works":["https://openalex.org/W2079655441","https://openalex.org/W2912293245","https://openalex.org/W1604114751","https://openalex.org/W4252942110","https://openalex.org/W2032941915","https://openalex.org/W2075706796","https://openalex.org/W2549308614","https://openalex.org/W3018584924","https://openalex.org/W2023694213","https://openalex.org/W2958354987"],"abstract_inverted_index":{"Several":[0],"solutions":[1],"for":[2],"lightweight":[3,37],"TTS":[4,38,52,75],"have":[5],"shown":[6],"promising":[7],"results.":[8],"Still,":[9],"they":[10],"either":[11],"rely":[12],"on":[13,115],"a":[14,23,36,44,126],"hand-crafted":[15],"design":[16],"that":[17],"reaches":[18],"non-optimum":[19],"size":[20],"or":[21,97],"use":[22],"neural":[24],"architecture":[25],"search":[26],"but":[27],"often":[28],"suffer":[29],"training":[30],"costs.":[31],"We":[32,137],"present":[33],"Nix-":[34],"TTS,":[35],"achieved":[39],"via":[40],"knowledge":[41],"distillation":[42,64],"to":[43,65,99,133],"high-quality":[45],"yet":[46,88],"large-sized,":[47],"non-autoregressive,":[48],"and":[49,62,68,83,111,118,123,130,141],"end-to-end":[50,84],"(vocoder-free)":[51],"teacher":[53,104,135],"model.":[54,136],"Specifically,":[55],"we":[56],"offer":[57],"module-wise":[58],"distillation,":[59],"enabling":[60],"flexible":[61],"independent":[63],"the":[66,77,86,103,134],"encoder":[67],"decoder":[69],"module.":[70],"The":[71],"resulting":[72],"Nix":[73],"-":[74],"inherited":[76],"advantageous":[78],"properties":[79],"of":[80,102,144],"being":[81],"non-autoregressive":[82],"from":[85],"teacher,":[87],"significantly":[89],"smaller":[90],"in":[91],"size,":[92],"with":[93],"only":[94],"5.23M":[95],"parameters":[96],"up":[98],"89.34%":[100],"reduction":[101],"model;":[105],"it":[106],"also":[107],"achieves":[108],"over":[109],"3.04x":[110],"8.36x":[112],"inference":[113],"speedup":[114],"Intel-i7":[116],"CPU":[117],"Raspberry":[119],"Pi":[120],"3B":[121],"respectively":[122],"still":[124],"retains":[125],"fair":[127],"voice":[128],"naturalness":[129],"intelligibility":[131],"compared":[132],"provide":[138],"pretrained":[139],"models":[140],"audio":[142],"samples":[143],"Nix-TTS":[145],"<sup":[146,149],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[147,150],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[148,151],"https://github.com/rendchevi/nix-tts.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
