{"id":"https://openalex.org/W4313061287","doi":"https://doi.org/10.1145/3561877.3561887","title":"Bidirectional Decoding Tacotron for Attention Based Neural Speech Synthesis","display_name":"Bidirectional Decoding Tacotron for Attention Based Neural Speech Synthesis","publication_year":2022,"publication_date":"2022-08-26","ids":{"openalex":"https://openalex.org/W4313061287","doi":"https://doi.org/10.1145/3561877.3561887"},"language":"en","primary_location":{"id":"doi:10.1145/3561877.3561887","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3561877.3561887","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 the 5th International Conference on Information Science and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100695572","display_name":"Wei Zhao","orcid":"https://orcid.org/0000-0002-7346-5947"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Zhao","raw_affiliation_strings":["College of Electrical Engineering, Zhejiang University, China"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering, Zhejiang University, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101747577","display_name":"Li Xu","orcid":"https://orcid.org/0000-0002-1693-0857"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Xu","raw_affiliation_strings":["College of Electrical Engineering, Zhejiang University, China"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering, Zhejiang University, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100695572"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14454898,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"64","last_page":"69"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.9009475708007812},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8057610392570496},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.677436888217926},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6100835204124451},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5913572907447815},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4966633915901184},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4895502030849457},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4601787328720093},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2892846465110779},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.269554078578949},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.07331472635269165}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.9009475708007812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8057610392570496},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.677436888217926},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6100835204124451},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5913572907447815},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4966633915901184},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4895502030849457},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4601787328720093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2892846465110779},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.269554078578949},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.07331472635269165},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3561877.3561887","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3561877.3561887","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 the 5th International Conference on Information Science and Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7799999713897705,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1522301498","https://openalex.org/W2129142580","https://openalex.org/W2133564696","https://openalex.org/W2460550122","https://openalex.org/W2466062786","https://openalex.org/W2592930876","https://openalex.org/W2612690371","https://openalex.org/W2730845691","https://openalex.org/W2767052532","https://openalex.org/W2777302760","https://openalex.org/W2783493343","https://openalex.org/W2792995953","https://openalex.org/W2898847420","https://openalex.org/W2935811960","https://openalex.org/W2946200149","https://openalex.org/W2963965917","https://openalex.org/W2982055294","https://openalex.org/W3033411150","https://openalex.org/W3097538987","https://openalex.org/W6736996214","https://openalex.org/W6739901393","https://openalex.org/W6757079273","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W2368824897","https://openalex.org/W1508050556","https://openalex.org/W1910862367","https://openalex.org/W1950940422","https://openalex.org/W2379365082","https://openalex.org/W2370747590","https://openalex.org/W4283822356","https://openalex.org/W2030109976","https://openalex.org/W2369260257","https://openalex.org/W2129146436"],"abstract_inverted_index":{"Attention-based":[0],"neural":[1],"text-to-speech":[2],"(TTS)":[3],"has":[4],"become":[5],"increasingly":[6],"popular":[7],"because":[8],"of":[9,42,89,151],"its":[10],"end-to-end":[11],"network":[12,61],"architecture":[13],"and":[14,109,129,139],"impressive":[15],"performance":[16],"comparable":[17],"to":[18,36,48,76,82,102,105,132,147],"human":[19],"recordings.":[20],"However,":[21],"existing":[22],"approaches":[23],"usually":[24],"adopt":[25],"a":[26,56,73],"unidirectional":[27],"decoding":[28,58,123],"framework":[29],"generating":[30],"the":[31,52,64,78,86,90,94,98,110,115,134,149],"target":[32],"spectrum":[33,79,99],"from":[34,46,80,100,114],"left":[35,101],"right,":[37,103],"which":[38],"cannot":[39],"take":[40],"advantage":[41],"reverse":[43],"target-side":[44],"contexts":[45],"right":[47,81],"left.":[49],"To":[50],"mitigate":[51],"problem,":[53],"we":[54],"present":[55],"bidirectional":[57,122],"speech":[59],"synthesis":[60],"based":[62],"on":[63,85,142],"well-known":[65],"Tacotron2.":[66],"In":[67],"particular,":[68],"our":[69,121,152],"model":[70],"first":[71],"employs":[72],"backward":[74,116,128],"decoder":[75,96],"predict":[77],"left,":[83],"conditioned":[84],"output":[87],"states":[88,113],"text":[91],"encoder.":[92],"Then,":[93],"forward":[95,130],"generates":[97],"attending":[104],"both":[106,127],"encoder":[107],"outputs":[108],"context":[111],"hidden":[112],"decoder.":[117],"With":[118],"this":[119],"architecture,":[120],"Tacotron2":[124],"can":[125],"exploit":[126],"information":[131],"promote":[133],"performance.":[135],"Experiments":[136],"with":[137],"objective":[138],"subjective":[140],"evaluations":[141],"LJSpeech":[143],"have":[144],"been":[145],"conducted":[146],"demonstrate":[148],"effectiveness":[150],"proposed":[153],"method.":[154]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
