{"id":"https://openalex.org/W3196964833","doi":"https://doi.org/10.21437/interspeech.2021-471","title":"Hierarchical Context-Aware Transformers for Non-Autoregressive Text to Speech","display_name":"Hierarchical Context-Aware Transformers for Non-Autoregressive Text to Speech","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3196964833","doi":"https://doi.org/10.21437/interspeech.2021-471","mag":"3196964833"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-471","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060429696","display_name":"Jae\u2010sung Bae","orcid":"https://orcid.org/0000-0002-8270-0072"},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jae-Sung Bae","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Seongnam, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Seongnam, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073688244","display_name":"Taejun Bak","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Taejun Bak","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Seongnam, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Seongnam, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101800027","display_name":"Young-Sun Joo","orcid":"https://orcid.org/0000-0002-7428-5868"},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Young-Sun Joo","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Seongnam, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Seongnam, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052468556","display_name":"Hoon Young Cho","orcid":"https://orcid.org/0000-0002-6850-6580"},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hoon-Young Cho","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Seongnam, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Seongnam, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060429696"],"corresponding_institution_ids":["https://openalex.org/I4210135449"],"apc_list":null,"apc_paid":null,"fwci":0.2719,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.63163039,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3610","last_page":"3614"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8103982210159302},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7360433340072632},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7181276082992554},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.7064412832260132},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6108670234680176},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5447692275047302},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3401006758213043},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08316254615783691},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07819920778274536}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8103982210159302},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7360433340072632},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7181276082992554},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.7064412832260132},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6108670234680176},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5447692275047302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3401006758213043},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08316254615783691},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07819920778274536},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2021-471","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W2107740512","https://openalex.org/W2107831318","https://openalex.org/W2184310502","https://openalex.org/W2749651610","https://openalex.org/W2766812927","https://openalex.org/W2794490148","https://openalex.org/W2896457183","https://openalex.org/W2903739847","https://openalex.org/W2919624000","https://openalex.org/W2946200149","https://openalex.org/W2946567085","https://openalex.org/W2952269766","https://openalex.org/W2964243274","https://openalex.org/W3015468748","https://openalex.org/W3016021263","https://openalex.org/W3016136182","https://openalex.org/W3033411150","https://openalex.org/W3094650042","https://openalex.org/W3095491807","https://openalex.org/W3097828251","https://openalex.org/W3150572638","https://openalex.org/W4287704453","https://openalex.org/W4289383906","https://openalex.org/W4323654151","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W4327525404","https://openalex.org/W4287185323","https://openalex.org/W3150905897","https://openalex.org/W1520183331","https://openalex.org/W2734842993","https://openalex.org/W2168175994","https://openalex.org/W4281476908"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,53,77,116],"propose":[4,54],"methods":[5],"for":[6],"improving":[7],"the":[8,18,35,46,50,69,74,83,91,94,97,107,119,124,141,145],"modeling":[9,47,121],"performance":[10,48],"of":[11,29,49,71,123],"a":[12,55,87],"Transformer-based":[13],"non-autoregressive":[14],"textto-speech":[15],"(TNA-TTS)":[16],"model.Although":[17],"text":[19,32,59,75,88],"encoder":[20,60,84],"and":[21,27,33,61,130,136],"audio":[22,62,98,125],"decoder":[23,63,99,126],"handle":[24],"different":[25],"types":[26],"lengths":[28],"data":[30],"(i.e.,":[31],"audio),":[34],"TNA-TTS":[36,51],"models":[37],"are":[38,65],"not":[39],"designed":[40,66],"considering":[41],"these":[42],"variations.Therefore,":[43],"to":[44,67,93,104,113],"improve":[45,118],"model":[52],"hierarchical":[56],"Transformer":[57],"structure-based":[58],"that":[64,140],"accommodate":[68],"characteristics":[70],"each":[72,79],"module.For":[73],"encoder,":[76],"constrain":[78],"self-attention":[80,102],"layer":[81],"so":[82],"focuses":[85],"on":[86],"sequence":[89],"from":[90,111],"local":[92,114],"global":[95,112],"scope.Conversely,":[96],"constrains":[100],"its":[101],"layers":[103],"focus":[105],"in":[106],"reverse":[108],"direction,":[109],"i.e.,":[110],"scope.Additionally,":[115],"further":[117],"pitch":[120,132],"accuracy":[122],"by":[127],"providing":[128],"sentence":[129],"word-level":[131],"as":[133],"conditions.Various":[134],"objective":[135],"subjective":[137],"evaluations":[138],"verified":[139],"proposed":[142],"method":[143],"outperformed":[144],"baseline":[146],"TNA-TTS.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
