{"id":"https://openalex.org/W4384613952","doi":"https://doi.org/10.48550/arxiv.2307.07062","title":"Controllable Emphasis with zero data for text-to-speech","display_name":"Controllable Emphasis with zero data for text-to-speech","publication_year":2023,"publication_date":"2023-07-13","ids":{"openalex":"https://openalex.org/W4384613952","doi":"https://doi.org/10.48550/arxiv.2307.07062"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2307.07062","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.07062","pdf_url":"https://arxiv.org/pdf/2307.07062","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2307.07062","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083043476","display_name":"Arnaud Joly","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Joly, Arnaud","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054742272","display_name":"Marco Nicolis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nicolis, Marco","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092489695","display_name":"Ekaterina Peterova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peterova, Ekaterina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031191479","display_name":"Alessandro Lombardi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lombardi, Alessandro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081086162","display_name":"Ammar Abbas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abbas, Ammar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004739281","display_name":"Arent van Korlaar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"van Korlaar, Arent","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026885120","display_name":"Aman Hussain","orcid":"https://orcid.org/0000-0001-7577-6645"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hussain, Aman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365417","display_name":"Parul Sharma","orcid":"https://orcid.org/0009-0007-4989-0381"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sharma, Parul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014483986","display_name":"Alexis Moinet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moinet, Alexis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032468476","display_name":"Mateusz \u0141ajszczak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lajszczak, Mateusz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006537074","display_name":"Penny Karanasou","orcid":"https://orcid.org/0000-0003-1939-4161"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karanasou, Penny","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000405202","display_name":"Antonio Bonafonte","orcid":"https://orcid.org/0000-0002-6240-9915"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bonafonte, Antonio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029023520","display_name":"Thomas Drugman","orcid":"https://orcid.org/0000-0002-1491-7878"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Drugman, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5023965740","display_name":"Elena Sokolova","orcid":"https://orcid.org/0000-0002-2001-8772"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sokolova, Elena","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5083043476"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.9083918333053589},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7688425779342651},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7583248615264893},{"id":"https://openalex.org/keywords/emphasis","display_name":"Emphasis (telecommunications)","score":0.6781271696090698},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.644321084022522},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5631062984466553},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5409039855003357},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.5286886096000671},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.5128293633460999},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.4951891005039215},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4868725836277008},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4733830988407135},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40130555629730225},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.26784950494766235}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.9083918333053589},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7688425779342651},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7583248615264893},{"id":"https://openalex.org/C177454536","wikidata":"https://www.wikidata.org/wiki/Q578290","display_name":"Emphasis (telecommunications)","level":2,"score":0.6781271696090698},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.644321084022522},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5631062984466553},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5409039855003357},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.5286886096000671},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.5128293633460999},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.4951891005039215},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4868725836277008},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4733830988407135},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40130555629730225},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26784950494766235},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2307.07062","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.07062","pdf_url":"https://arxiv.org/pdf/2307.07062","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2307.07062","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2307.07062","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2307.07062","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.07062","pdf_url":"https://arxiv.org/pdf/2307.07062","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Gender equality","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/5"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4384613952.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W2433276473","https://openalex.org/W1537411440","https://openalex.org/W1984347656","https://openalex.org/W2535215250","https://openalex.org/W2024201202","https://openalex.org/W2049083033"],"abstract_inverted_index":{"We":[0,47,81],"present":[1],"a":[2,24,71,76],"scalable":[3,101],"method":[4,32,97],"to":[5,33,90,99],"produce":[6],"high":[7],"quality":[8],"emphasis":[9],"for":[10,113],"text-to-speech":[11],"(TTS)":[12],"that":[13,49,83,92],"does":[14],"not":[15],"require":[16,93],"recordings":[17],"or":[18],"annotations.":[19],"Many":[20],"TTS":[21],"models":[22],"include":[23],"phoneme":[25],"duration":[26,42],"model.":[27],"A":[28],"simple":[29],"but":[30],"effective":[31],"achieve":[34],"emphasized":[35,68],"speech":[36],"consists":[37],"in":[38,70,104],"increasing":[39],"the":[40,44,67,88],"predicted":[41],"of":[43,66],"emphasised":[45],"word.":[46],"show":[48,82],"this":[50,84],"is":[51],"significantly":[52,86],"better":[53],"than":[54],"spectrogram":[55],"modification":[56],"techniques":[57],"improving":[58],"naturalness":[59],"by":[60,73],"$7.3\\%$":[61],"and":[62,102,116],"correct":[63],"testers'":[64],"identification":[65],"word":[69],"sentence":[72],"$40\\%$":[74],"on":[75],"reference":[77],"female":[78],"en-US":[79],"voice.":[80],"technique":[85],"closes":[87],"gap":[89],"methods":[91],"explicit":[94],"recordings.":[95],"The":[96],"proved":[98],"be":[100],"preferred":[103],"all":[105],"four":[106],"languages":[107],"tested":[108],"(English,":[109],"Spanish,":[110],"Italian,":[111],"German),":[112],"different":[114],"voices":[115],"multiple":[117],"speaking":[118],"styles.":[119]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
