{"id":"https://openalex.org/W3114579299","doi":"https://doi.org/10.1109/lsp.2021.3073869","title":"Incremental Text-to-Speech Synthesis Using Pseudo Lookahead With Large Pretrained Language Model","display_name":"Incremental Text-to-Speech Synthesis Using Pseudo Lookahead With Large Pretrained Language Model","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3114579299","doi":"https://doi.org/10.1109/lsp.2021.3073869","mag":"3114579299"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2021.3073869","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3073869","pdf_url":"https://ieeexplore.ieee.org/ielx7/97/9325893/09406329.pdf","source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/97/9325893/09406329.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Takaaki Saeki","orcid":"https://orcid.org/0000-0001-6003-768X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takaaki Saeki","raw_affiliation_strings":["Graduate School of Information Science and Technology, University of Tokyo, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0001-6003-768X","affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["Graduate School of Information Science and Technology, University of Tokyo, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0003-0520-7847","affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":null,"display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["Graduate School of Information Science and Technology, University of Tokyo, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0003-0876-5617","affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":1.6798,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86526132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"28","issue":null,"first_page":"857","last_page":"861"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6700000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6700000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.07010000199079514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.052299998700618744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.7924000024795532},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5936999917030334},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5601000189781189},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5285999774932861},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5196999907493591},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.40369999408721924}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8355000019073486},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.7924000024795532},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5936999917030334},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5601000189781189},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5540000200271606},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5285999774932861},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5196999907493591},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5066999793052673},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4796000123023987},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.40369999408721924},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.3910999894142151},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.3871000111103058},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.2992999851703644},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2678999900817871}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lsp.2021.3073869","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3073869","pdf_url":"https://ieeexplore.ieee.org/ielx7/97/9325893/09406329.pdf","source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2012.12612","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.12612","pdf_url":"https://arxiv.org/pdf/2012.12612","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1109/lsp.2021.3073869","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lsp.2021.3073869","pdf_url":"https://ieeexplore.ieee.org/ielx7/97/9325893/09406329.pdf","source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1688345242","display_name":null,"funder_award_id":"17H06101","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8935930414","display_name":null,"funder_award_id":"19H01116","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3114579299.pdf","grobid_xml":"https://content.openalex.org/works/W3114579299.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1975163393","https://openalex.org/W2129142580","https://openalex.org/W2141708418","https://openalex.org/W2526425061","https://openalex.org/W2903739847","https://openalex.org/W2962780374","https://openalex.org/W2963096510","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2972895078","https://openalex.org/W3048023795","https://openalex.org/W3083224111","https://openalex.org/W6611766843","https://openalex.org/W6631190155","https://openalex.org/W6636915900","https://openalex.org/W6675380101","https://openalex.org/W6679262520","https://openalex.org/W6750489868","https://openalex.org/W6770069697","https://openalex.org/W6782348501","https://openalex.org/W6785521828","https://openalex.org/W6917585676"],"related_works":[],"abstract_inverted_index":{"This":[0],"letter":[1],"presents":[2],"an":[3,54,66],"incremental":[4,67,100],"text-to-speech":[5],"(TTS)":[6],"method":[7,69,92,122,130],"that":[8,47,70,120],"performs":[9],"synthesis":[10],"in":[11],"small":[12],"linguistic":[13,111],"units":[14],"while":[15],"maintaining":[16],"the":[17,82,109,114,129,147],"naturalness":[18],"of":[19,53],"output":[20],"speech.":[21],"Incremental":[22],"TTS":[23,68],"is":[24,37],"generally":[25],"subject":[26],"to":[27,39,80,144],"a":[28,44,72,77,98,140],"trade-off":[29],"between":[30],"latency":[31],"and":[32,102,137],"synthetic":[33],"speech":[34,42,126,141],"quality.":[35],"It":[36],"challenging":[38],"produce":[40],"high-quality":[41],"with":[43,76],"low-latency":[45],"setup":[46],"does":[48],"not":[49],"make":[50],"much":[51],"use":[52],"unobserved":[55],"future":[56,83,148],"sentence":[57],"(hereafter,":[58],"\u201clookahead\u201d).":[59],"To":[60],"resolve":[61],"this":[62],"issue,":[63],"we":[64],"propose":[65],"uses":[71,103],"pseudo":[73],"lookahead":[74,115],"generated":[75],"language":[78],"model":[79],"take":[81],"contextual":[84],"information":[85,134],"into":[86,135],"account":[87,136],"without":[88],"increasing":[89],"latency.":[90],"Our":[91],"can":[93],"be":[94],"regarded":[95],"as":[96],"imitating":[97],"human's":[99],"reading":[101],"pretrained":[104],"GPT2,":[105],"which":[106],"accounts":[107],"for":[108,113,146],"large-scale":[110],"knowledge,":[112],"generation.":[116],"Evaluation":[117],"results":[118],"show":[119],"our":[121],"1)":[123],"achieves":[124,139],"higher":[125],"quality":[127,142],"than":[128],"taking":[131],"only":[132],"observed":[133],"2)":[138],"equivalent":[143],"waiting":[145],"context":[149],"observation.":[150]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2021-01-05T00:00:00"}
