{"id":"https://openalex.org/W4388118103","doi":"https://doi.org/10.23919/eusipco58844.2023.10289912","title":"Low-Resource Text-to-Speech Using Specific Data and Noise Augmentation","display_name":"Low-Resource Text-to-Speech Using Specific Data and Noise Augmentation","publication_year":2023,"publication_date":"2023-09-04","ids":{"openalex":"https://openalex.org/W4388118103","doi":"https://doi.org/10.23919/eusipco58844.2023.10289912"},"language":"en","primary_location":{"id":"doi:10.23919/eusipco58844.2023.10289912","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco58844.2023.10289912","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 31st European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071760559","display_name":"Kishor Kayyar Lakshminarayana","orcid":"https://orcid.org/0000-0001-7493-818X"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Kishor Kayyar Lakshminarayana","raw_affiliation_strings":["Fraunhofer Institute for Integrated Circuits (IIS),Erlangen,Germany","Fraunhofer Institute for Integrated Circuits (IIS), Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Integrated Circuits (IIS),Erlangen,Germany","institution_ids":["https://openalex.org/I4210124274"]},{"raw_affiliation_string":"Fraunhofer Institute for Integrated Circuits (IIS), Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009881719","display_name":"Christian Dittmar","orcid":"https://orcid.org/0000-0002-3220-2446"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Dittmar","raw_affiliation_strings":["Fraunhofer Institute for Integrated Circuits (IIS),Erlangen,Germany","Fraunhofer Institute for Integrated Circuits (IIS), Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Integrated Circuits (IIS),Erlangen,Germany","institution_ids":["https://openalex.org/I4210124274"]},{"raw_affiliation_string":"Fraunhofer Institute for Integrated Circuits (IIS), Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039716073","display_name":"Nicola Pia","orcid":"https://orcid.org/0000-0003-0987-863X"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Nicola Pia","raw_affiliation_strings":["Fraunhofer Institute for Integrated Circuits (IIS),Erlangen,Germany","Fraunhofer Institute for Integrated Circuits (IIS), Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Integrated Circuits (IIS),Erlangen,Germany","institution_ids":["https://openalex.org/I4210124274"]},{"raw_affiliation_string":"Fraunhofer Institute for Integrated Circuits (IIS), Erlangen, Germany","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031626756","display_name":"Emanu\u00ebl A. P. Habets","orcid":"https://orcid.org/0000-0002-2613-8046"},"institutions":[{"id":"https://openalex.org/I4210123192","display_name":"International Audio Laboratories Erlangen","ror":"https://ror.org/02mkz3e80","country_code":"DE","type":"facility","lineage":["https://openalex.org/I181369854","https://openalex.org/I4210123192","https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Emanu\u00ebl Habets","raw_affiliation_strings":["International Audio Laboratories,Erlangen,Germany","International Audio Laboratories, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"International Audio Laboratories,Erlangen,Germany","institution_ids":["https://openalex.org/I4210123192"]},{"raw_affiliation_string":"International Audio Laboratories, Erlangen, Germany","institution_ids":["https://openalex.org/I4210123192"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5071760559"],"corresponding_institution_ids":["https://openalex.org/I4210124274"],"apc_list":null,"apc_paid":null,"fwci":0.5245,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72171901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"61","last_page":"65"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8511287569999695},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6212655901908875},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5987544655799866},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.5847605466842651},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5267189145088196},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4891493618488312},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.47836655378341675},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.45100298523902893},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3965272307395935},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3678976893424988}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8511287569999695},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6212655901908875},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5987544655799866},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.5847605466842651},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5267189145088196},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4891493618488312},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.47836655378341675},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.45100298523902893},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3965272307395935},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3678976893424988},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/eusipco58844.2023.10289912","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco58844.2023.10289912","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 31st European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"},{"id":"pmh:oai:publica.fraunhofer.de:publica/468969","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/468969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8100000023841858,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W70888257","https://openalex.org/W95152782","https://openalex.org/W186914350","https://openalex.org/W2084609288","https://openalex.org/W2619368999","https://openalex.org/W2791686384","https://openalex.org/W2808706139","https://openalex.org/W2889028433","https://openalex.org/W2936774411","https://openalex.org/W2964243274","https://openalex.org/W2969658393","https://openalex.org/W2972668418","https://openalex.org/W2972802841","https://openalex.org/W3022876224","https://openalex.org/W3080626558","https://openalex.org/W3081416955","https://openalex.org/W3097290232","https://openalex.org/W3161236344","https://openalex.org/W3161492781","https://openalex.org/W3161890269","https://openalex.org/W3167533889","https://openalex.org/W3172021299","https://openalex.org/W3195171908","https://openalex.org/W3195366621","https://openalex.org/W4225943493","https://openalex.org/W4231807801","https://openalex.org/W4283777098","https://openalex.org/W6603838645","https://openalex.org/W6607631539","https://openalex.org/W6758366080","https://openalex.org/W6765987481","https://openalex.org/W6768092425","https://openalex.org/W6796941875"],"related_works":["https://openalex.org/W2081919107","https://openalex.org/W3115948027","https://openalex.org/W2405110677","https://openalex.org/W2811496854","https://openalex.org/W209733029","https://openalex.org/W2891480213","https://openalex.org/W3118953353","https://openalex.org/W2158542502","https://openalex.org/W1997978958","https://openalex.org/W133774893"],"abstract_inverted_index":{"Many":[0],"neural":[1],"text-to-speech":[2],"architectures":[3,13],"can":[4],"synthesize":[5],"nearly":[6],"natural":[7],"speech":[8,26],"from":[9],"text":[10],"inputs.":[11],"These":[12],"must":[14],"be":[15,123],"trained":[16,130],"with":[17,59,126,131,143],"tens":[18],"of":[19,21,39,70,112,134],"hours":[20,111,133],"annotated":[22],"and":[23,41,56,98,106],"high-":[24],"quality":[25],"data.":[27,73,136],"Compiling":[28],"such":[29],"large":[30],"databases":[31],"for":[32,89],"every":[33],"new":[34],"voice":[35],"requires":[36],"a":[37,48,67,144],"lot":[38],"time":[40],"effort.":[42],"In":[43,74,137],"this":[44],"paper,":[45],"we":[46,84,139],"describe":[47],"method":[49],"to":[50,62,76,96,122],"extend":[51],"the":[52,82,127],"popular":[53],"Tacotron-2":[54,129],"architecture":[55],"its":[57],"training":[58,72,105,113],"data":[60,90],"augmentation":[61,78],"enable":[63],"single-speaker":[64],"synthesis":[65],"using":[66],"limited":[68],"amount":[69],"specific":[71],"contrast":[75],"elaborate":[77],"methods":[79],"proposed":[80],"in":[81],"literature,":[83],"use":[85],"simple":[86],"stationary":[87],"noises":[88],"augmentation.":[91],"Our":[92],"extension":[93],"is":[94],"easy":[95],"implement":[97],"adds":[99],"almost":[100],"no":[101],"computational":[102],"overhead":[103],"during":[104],"inference.":[107],"Using":[108],"only":[109],"two":[110],"data,":[114],"our":[115,141],"approach":[116],"was":[117],"rated":[118],"by":[119],"human":[120],"listeners":[121],"on":[124],"par":[125],"baseline":[128],"23.5":[132],"LJSpeech":[135],"addition,":[138],"tested":[140],"model":[142],"semantically":[145],"unpredictable":[146],"sentences":[147],"test,":[148],"which":[149],"showed":[150],"that":[151],"both":[152],"models":[153],"exhibit":[154],"similar":[155],"intelligibility":[156],"levels.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
