{"id":"https://openalex.org/W3048023795","doi":"https://doi.org/10.21437/interspeech.2020-1822","title":"Incremental Text to Speech for Neural Sequence-to-Sequence Models Using Reinforcement Learning","display_name":"Incremental Text to Speech for Neural Sequence-to-Sequence Models Using Reinforcement Learning","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3048023795","doi":"https://doi.org/10.21437/interspeech.2020-1822","mag":"3048023795"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1822","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1822","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2008.03096","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024033876","display_name":"Devang S Ram Mohan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Devang S. Ram Mohan","raw_affiliation_strings":["Papercup Technologies Ltd.,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Papercup Technologies Ltd.,","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038524461","display_name":"Raphael Lenain","orcid":"https://orcid.org/0000-0001-5345-0628"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raphael Lenain","raw_affiliation_strings":["Novoic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Novoic","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068657700","display_name":"Lorenzo Foglianti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lorenzo Foglianti","raw_affiliation_strings":["Papercup Technologies Ltd.,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Papercup Technologies Ltd.,","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038517086","display_name":"Tian Huey Teh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian Huey Teh","raw_affiliation_strings":["Papercup Technologies Ltd.,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Papercup Technologies Ltd.,","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008306627","display_name":"Marlene Staib","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marlene Staib","raw_affiliation_strings":["Papercup Technologies Ltd.,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Papercup Technologies Ltd.,","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083188961","display_name":"Alexandra Torresquintero","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexandra Torresquintero","raw_affiliation_strings":["Papercup Technologies Ltd.,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Papercup Technologies Ltd.,","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007377741","display_name":"Jiameng Gao","orcid":"https://orcid.org/0000-0003-4161-938X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiameng Gao","raw_affiliation_strings":["Papercup Technologies Ltd.,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Papercup Technologies Ltd.,","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3541,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.85397542,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3186","last_page":"3190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8075008392333984},{"id":"https://openalex.org/keywords/interleaving","display_name":"Interleaving","score":0.8005900382995605},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7228673696517944},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6631616950035095},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6469206213951111},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6106237173080444},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46157559752464294},{"id":"https://openalex.org/keywords/sequence-learning","display_name":"Sequence learning","score":0.45772892236709595},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.4268646836280823},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.4261434078216553},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3922811448574066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8075008392333984},{"id":"https://openalex.org/C28034677","wikidata":"https://www.wikidata.org/wiki/Q17092530","display_name":"Interleaving","level":2,"score":0.8005900382995605},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7228673696517944},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6631616950035095},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6469206213951111},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6106237173080444},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46157559752464294},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.45772892236709595},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.4268646836280823},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.4261434078216553},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3922811448574066},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2020-1822","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1822","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2008.03096","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2008.03096","pdf_url":"https://arxiv.org/pdf/2008.03096","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2008.03096","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2008.03096","pdf_url":"https://arxiv.org/pdf/2008.03096","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.75}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1828163288","https://openalex.org/W1941338968","https://openalex.org/W1975163393","https://openalex.org/W2064675550","https://openalex.org/W2119717200","https://openalex.org/W2122262818","https://openalex.org/W2125308790","https://openalex.org/W2131774270","https://openalex.org/W2142813692","https://openalex.org/W2204302769","https://openalex.org/W2419292002","https://openalex.org/W2491408735","https://openalex.org/W2529548870","https://openalex.org/W2605141709","https://openalex.org/W2607084763","https://openalex.org/W2746192915","https://openalex.org/W2750347728","https://openalex.org/W2758584285","https://openalex.org/W2767601419","https://openalex.org/W2798685342","https://openalex.org/W2896234185","https://openalex.org/W2901997113","https://openalex.org/W2948211236","https://openalex.org/W2951562371","https://openalex.org/W2952264928","https://openalex.org/W2952992734","https://openalex.org/W2962760690","https://openalex.org/W2962826786","https://openalex.org/W2963312585","https://openalex.org/W2964243274","https://openalex.org/W2964272710","https://openalex.org/W2964307104","https://openalex.org/W2970074184","https://openalex.org/W2972702018","https://openalex.org/W2982174878","https://openalex.org/W2985856318","https://openalex.org/W3015922793","https://openalex.org/W3016010032","https://openalex.org/W3104081910","https://openalex.org/W4230563027","https://openalex.org/W4294619417","https://openalex.org/W4298580827","https://openalex.org/W4300003935","https://openalex.org/W4318717450"],"related_works":["https://openalex.org/W3205411230","https://openalex.org/W4286899009","https://openalex.org/W3163341049","https://openalex.org/W9168048","https://openalex.org/W4300849822","https://openalex.org/W4376480820","https://openalex.org/W3155891479","https://openalex.org/W3029351463","https://openalex.org/W4296591952","https://openalex.org/W4308600690"],"abstract_inverted_index":{"Modern":[0],"approaches":[1],"to":[2,4,12,79,83,130],"text":[3],"speech":[5],"require":[6],"the":[7,23,35,50,63,67,105,108,114],"entire":[8],"input":[9],"character":[10,40],"sequence":[11,54],"be":[13,70,128],"processed":[14],"before":[15],"any":[16],"audio":[17,45,111],"is":[18],"synthesised.":[19],"This":[20],"latency":[21,109],"limits":[22],"suitability":[24],"of":[25,37,43,52,55,65,93,110,116],"such":[26],"models":[27,126],"for":[28],"time-sensitive":[29],"tasks":[30],"like":[31],"simultaneous":[32],"interpretation.":[33],"Interleaving":[34],"action":[36],"reading":[38],"a":[39,74],"with":[41],"that":[42,92,100,123],"synthesising":[44],"reduces":[46],"this":[47,53,85],"latency.":[48],"However,":[49],"order":[51],"interleaved":[56],"actions":[57,68],"varies":[58],"across":[59],"sentences,":[60],"which":[61],"raises":[62],"question":[64],"how":[66],"should":[69],"chosen.":[71],"We":[72,87],"propose":[73],"reinforcement":[75],"learning":[76],"based":[77],"framework":[78],"train":[80],"an":[81,133],"agent":[82,102],"make":[84],"decision.":[86],"compare":[88],"our":[89,101],"performance":[90],"against":[91],"deterministic,":[94],"rule-based":[95],"systems.":[96],"Our":[97],"results":[98],"demonstrate":[99],"successfully":[103],"balances":[104],"trade-off":[106],"between":[107],"generation":[112],"and":[113],"quality":[115],"synthesised":[117],"audio.":[118],"More":[119],"broadly,":[120],"we":[121],"show":[122],"neural":[124],"sequence-to-sequence":[125],"can":[127],"adapted":[129],"run":[131],"in":[132],"incremental":[134],"manner.":[135]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
