{"id":"https://openalex.org/W4405490659","doi":"https://doi.org/10.1109/iccp63557.2024.10793010","title":"Efficient Training Strategies for Natural Sounding Speech Synthesis and Speaker Adaptation Based on Fastpitch","display_name":"Efficient Training Strategies for Natural Sounding Speech Synthesis and Speaker Adaptation Based on Fastpitch","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4405490659","doi":"https://doi.org/10.1109/iccp63557.2024.10793010"},"language":"en","primary_location":{"id":"doi:10.1109/iccp63557.2024.10793010","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp63557.2024.10793010","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.13985555","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109819145","display_name":"Teodora R\u0103gman","orcid":null},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Teodora R\u0103gman","raw_affiliation_strings":["Technical University of Cluj-Napoca,Communications Department,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Communications Department,Romania","institution_ids":["https://openalex.org/I158333966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018126799","display_name":"Adriana Stan","orcid":"https://orcid.org/0000-0003-2894-5770"},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Adriana Stan","raw_affiliation_strings":["Technical University of Cluj-Napoca,Communications Department,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Communications Department,Romania","institution_ids":["https://openalex.org/I158333966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5109819145"],"corresponding_institution_ids":["https://openalex.org/I158333966"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2145373,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9406999945640564,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9406999945640564,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7018833160400391},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.627259373664856},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5858957171440125},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5574688911437988},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.49487966299057007},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.46640291810035706},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09097060561180115},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.05868580937385559}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7018833160400391},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.627259373664856},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5858957171440125},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5574688911437988},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.49487966299057007},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.46640291810035706},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09097060561180115},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.05868580937385559},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iccp63557.2024.10793010","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp63557.2024.10793010","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"},{"id":"doi:10.5281/zenodo.13985555","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.13985555","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.13985555","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.13985555","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2519091744","https://openalex.org/W2738884019","https://openalex.org/W3150572638","https://openalex.org/W4293199560","https://openalex.org/W4296069300","https://openalex.org/W4297841465","https://openalex.org/W4319862723","https://openalex.org/W4385822310","https://openalex.org/W4392931276","https://openalex.org/W4402112474","https://openalex.org/W4402118932","https://openalex.org/W6734815144","https://openalex.org/W6739369274"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952","https://openalex.org/W4366179611","https://openalex.org/W2996078371"],"abstract_inverted_index":{"This":[0],"paper":[1],"focuses":[2],"on":[3,62,67],"adapting":[4],"the":[5,8,12,16,32,41,70,84,111,116,134],"functionalities":[6],"of":[7,18,34,43,69,74,128],"FastPitch":[9,71],"model":[10],"to":[11,22,108],"Romanian":[13],"language;":[14],"extending":[15],"set":[17],"speakers":[19],"from":[20,83],"one":[21,106],"eighteen;":[23],"synthesising":[24],"speech":[25,77],"using":[26],"an":[27],"anonymous":[28,97],"identity;":[29],"and":[30,46,51,57,87,120,139],"replicating":[31],"identities":[33],"new,":[35],"unseen":[36],"speakers.":[37,95],"During":[38],"this":[39],"work,":[40,130],"effects":[42],"various":[44],"configurations":[45],"training":[47,85],"strategies":[48],"were":[49],"tested":[50],"discussed,":[52],"along":[53],"with":[54],"their":[55],"advantages":[56],"weaknesses.":[58],"Finally,":[59],"we":[60,124],"settled":[61],"a":[63],"new":[64],"configuration,":[65],"built":[66],"top":[68],"architecture,":[72],"capable":[73],"producing":[75],"natural":[76],"synthesis,":[78,104],"for":[79,102,136],"both":[80],"known":[81],"(identities":[82,89],"dataset)":[86],"unknown":[88],"learnt":[90],"through":[91],"short":[92],"reference":[93],"samples)":[94],"The":[96],"speaker":[98],"can":[99],"be":[100],"used":[101],"text-to-speech":[103],"if":[105],"wants":[107],"cancel":[109],"out":[110],"identity":[112],"information":[113],"while":[114],"keeping":[115],"semantic":[117],"content":[118],"whole":[119],"clear.":[121],"At":[122],"last,":[123],"discussed":[125],"possible":[126],"limitations":[127],"our":[129],"which":[131],"will":[132],"form":[133],"basis":[135],"future":[137],"investigations":[138],"advancements.":[140]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
