{"id":"https://openalex.org/W3203394408","doi":"https://doi.org/10.1109/icassp43922.2022.9746107","title":"Mixer-TTS: Non-Autoregressive, Fast and Compact Text-to-Speech Model Conditioned on Language Model Embeddings","display_name":"Mixer-TTS: Non-Autoregressive, Fast and Compact Text-to-Speech Model Conditioned on Language Model Embeddings","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3203394408","doi":"https://doi.org/10.1109/icassp43922.2022.9746107","mag":"3203394408"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746107","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746107","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082432290","display_name":"Oktai Tatanov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Oktai Tatanov","raw_affiliation_strings":["NVIDIA,Santa Clara","NVIDIA, Santa Clara"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064449185","display_name":"Stanislav Beliaev","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stanislav Beliaev","raw_affiliation_strings":["NVIDIA,Santa Clara","NVIDIA, Santa Clara"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032957280","display_name":"Boris Ginsburg","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boris Ginsburg","raw_affiliation_strings":["NVIDIA,Santa Clara","NVIDIA, Santa Clara"],"affiliations":[{"raw_affiliation_string":"NVIDIA,Santa Clara","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5082432290"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":1.1481,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.79238883,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7482","last_page":"7486"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7330256104469299},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7218596339225769},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.702955961227417},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.6830393075942993},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6806543469429016},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6321067810058594},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.6181667447090149},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6153578758239746},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4813402593135834},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42927587032318115},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11088263988494873},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1048058271408081},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08033475279808044}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7330256104469299},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7218596339225769},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.702955961227417},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.6830393075942993},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6806543469429016},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6321067810058594},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.6181667447090149},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6153578758239746},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4813402593135834},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42927587032318115},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11088263988494873},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1048058271408081},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08033475279808044},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746107","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746107","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W2191779130","https://openalex.org/W2591927543","https://openalex.org/W2619368999","https://openalex.org/W2763421725","https://openalex.org/W2766812927","https://openalex.org/W2896457183","https://openalex.org/W2945785363","https://openalex.org/W2946200149","https://openalex.org/W2963341956","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2970730223","https://openalex.org/W2973217961","https://openalex.org/W2974231335","https://openalex.org/W2979826702","https://openalex.org/W2995435108","https://openalex.org/W2996428491","https://openalex.org/W3026874504","https://openalex.org/W3033411150","https://openalex.org/W3034949308","https://openalex.org/W3092028330","https://openalex.org/W3098824823","https://openalex.org/W3130016944","https://openalex.org/W3150572638","https://openalex.org/W3156871171","https://openalex.org/W3157506437","https://openalex.org/W3163339651","https://openalex.org/W3194000401","https://openalex.org/W3196001064","https://openalex.org/W4285719527","https://openalex.org/W6734815144","https://openalex.org/W6745245109","https://openalex.org/W6745697700","https://openalex.org/W6755207826","https://openalex.org/W6762287338","https://openalex.org/W6763832098","https://openalex.org/W6765987481","https://openalex.org/W6767671539","https://openalex.org/W6768021236","https://openalex.org/W6777694618","https://openalex.org/W6778823374","https://openalex.org/W6783867762","https://openalex.org/W6785503291","https://openalex.org/W6794528836","https://openalex.org/W6795140394","https://openalex.org/W6800389019","https://openalex.org/W6800393981","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W2998781440","https://openalex.org/W3164858600","https://openalex.org/W3109498233","https://openalex.org/W2031768607","https://openalex.org/W2885339073","https://openalex.org/W4297798732","https://openalex.org/W4200068392","https://openalex.org/W4205278983","https://openalex.org/W3161890269","https://openalex.org/W4298324454"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"Mixer-TTS,":[3],"a":[4,56,67,79,90],"non-autoregressive":[5],"model":[6,11],"for":[7,19],"mel-spectrogram":[8],"generation.":[9],"The":[10,22],"is":[12],"based":[13],"on":[14],"the":[15,31,42,47,103],"MLP-Mixer":[16],"architecture":[17],"adapted":[18],"speech":[20,99],"synthesis.":[21],"basic":[23,43],"Mixer-TTS":[24,61],"contains":[25],"pitch":[26],"and":[27,62,74,95],"duration":[28],"predictors,":[29],"with":[30,35,105],"latter":[32],"being":[33],"trained":[34],"an":[36],"unsupervised":[37],"TTS":[38],"alignment":[39],"framework.":[40],"Alongside":[41],"model,":[44],"we":[45],"propose":[46],"extended":[48,64],"version":[49,65],"which":[50],"additionally":[51],"uses":[52],"token":[53],"embeddings":[54],"from":[55],"pre-trained":[57],"language":[58],"model.":[59],"Basic":[60],"its":[63],"achieve":[66],"mean":[68],"opinion":[69],"score":[70],"(MOS)":[71],"of":[72,81,83,93],"4.05":[73],"4.11,":[75],"respectively,":[76],"compared":[77,101],"to":[78,102],"MOS":[80],"4.27":[82],"original":[84],"LJSpeech":[85],"samples.":[86],"Both":[87],"versions":[88],"have":[89],"small":[91],"number":[92],"parameters":[94],"enable":[96],"much":[97],"faster":[98],"synthesis":[100],"models":[104],"similar":[106],"quality.":[107]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2025-10-10T00:00:00"}
