{"id":"https://openalex.org/W4414140760","doi":"https://doi.org/10.1109/iwssip66997.2025.11151916","title":"Exploiting voice conversion in creating new TTS voices","display_name":"Exploiting voice conversion in creating new TTS voices","publication_year":2025,"publication_date":"2025-06-24","ids":{"openalex":"https://openalex.org/W4414140760","doi":"https://doi.org/10.1109/iwssip66997.2025.11151916"},"language":"en","primary_location":{"id":"doi:10.1109/iwssip66997.2025.11151916","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwssip66997.2025.11151916","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 32nd International Conference on Systems, Signals and Image Processing (IWSSIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055197467","display_name":"Tijana Nosek","orcid":"https://orcid.org/0000-0002-3707-0286"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Tijana Nosek","raw_affiliation_strings":["Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053625267","display_name":"Sini\u0161a Suzi\u0107","orcid":"https://orcid.org/0000-0002-0511-6729"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Sini\u0161a Suzi\u0107","raw_affiliation_strings":["Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012695218","display_name":"Nikola Simi\u0107","orcid":"https://orcid.org/0000-0002-0748-4672"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Nikola Simi\u0107","raw_affiliation_strings":["Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085575169","display_name":"Milan Se\u010dujski","orcid":"https://orcid.org/0000-0002-3426-3277"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Milan Se\u010dujski","raw_affiliation_strings":["Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110585076","display_name":"Darko Pekar","orcid":"https://orcid.org/0000-0003-3026-8086"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Darko Pekar","raw_affiliation_strings":["AlfaNum Ltd.,Novi Sad,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AlfaNum Ltd.,Novi Sad,Serbia","institution_ids":["https://openalex.org/I170726198"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074165790","display_name":"Vlado Deli\u0107","orcid":"https://orcid.org/0000-0002-4558-9918"},"institutions":[{"id":"https://openalex.org/I170726198","display_name":"University of Novi Sad","ror":"https://ror.org/00xa57a59","country_code":"RS","type":"education","lineage":["https://openalex.org/I170726198"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Vlado Deli\u0107","raw_affiliation_strings":["Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Technical Sciences, University of Novi Sad,Novi Sad,Serbia","institution_ids":["https://openalex.org/I170726198"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I170726198"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11067872,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.9049999713897705},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6736000180244446},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5637000203132629},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.5594000220298767},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.5162000060081482},{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.5123000144958496},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43540000915527344}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.9049999713897705},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7494999766349792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7389000058174133},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6736000180244446},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5637000203132629},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.5594000220298767},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.5162000060081482},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.5123000144958496},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43540000915527344},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.423799991607666},{"id":"https://openalex.org/C182964821","wikidata":"https://www.wikidata.org/wiki/Q7939498","display_name":"Voice analysis","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.336899995803833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32359999418258667},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C137584468","wikidata":"https://www.wikidata.org/wiki/Q35395","display_name":"Phonetics","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.28299999237060547},{"id":"https://openalex.org/C173988693","wikidata":"https://www.wikidata.org/wiki/Q678132","display_name":"Phonation","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.2572000026702881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwssip66997.2025.11151916","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwssip66997.2025.11151916","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 32nd International Conference on Systems, Signals and Image Processing (IWSSIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320329307","display_name":"Science Fund of the Republic of Serbia","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2767132343","https://openalex.org/W2798665391","https://openalex.org/W2962866891","https://openalex.org/W2964243274","https://openalex.org/W3032080156","https://openalex.org/W3092028330","https://openalex.org/W3209059054","https://openalex.org/W3213544594","https://openalex.org/W4313035852","https://openalex.org/W4313679638","https://openalex.org/W4320459320","https://openalex.org/W4380714711","https://openalex.org/W4389115350","https://openalex.org/W4392426139","https://openalex.org/W4404590048"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"development":[1],"of":[2,45,57,93,109,129,144,166],"high-quality":[3],"Serbian":[4],"text-to-speech":[5],"(TTS)":[6],"systems":[7,175],"for":[8,65,169,176],"new":[9],"speakers":[10],"traditionally":[11],"requires":[12],"extensive":[13],"phonetic":[14],"and":[15,24,53,84,88,106,118],"prosodic":[16],"annotations,":[17],"a":[18,29,46],"process":[19],"that":[20,32,123],"is":[21],"both":[22],"time-consuming":[23],"resource-intensive.":[25],"This":[26],"paper":[27],"explores":[28],"novel":[30],"approach":[31],"leverages":[33],"voice":[34,44],"conversion":[35,99],"(RVC)":[36],"techniques":[37],"to":[38,149],"generate":[39],"synthetic":[40],"speech":[41,82,95,116],"in":[42,173],"the":[43,58,142,151,164],"target":[47,66,110,145],"speaker.":[48],"In":[49],"this":[50],"scenario":[51],"phonetically":[52],"prosodically":[54],"annotated":[55],"transcriptions":[56],"source":[59],"speaker":[60,146,171],"could":[61],"also":[62,104],"be":[63],"re-used":[64],"speaker,":[67],"i.e.":[68],"RVC":[69,98,152,168],"synthesized":[70],"speech,":[71],"as":[72],"well.":[73],"Four":[74],"models":[75,100],"were":[76],"evaluated:":[77],"two":[78,89],"adapted":[79,90],"with":[80,91,136],"natural":[81,139],"(30":[83],"3":[85,107],"minutes,":[86],"respectively),":[87],"2.5h":[92],"RVC-generated":[94,124],"based":[96],"where":[97],"are":[101],"trained":[102],"using":[103,167],"30":[105],"minutes":[108],"speakers\u2019":[111],"speech.":[112],"Listening":[113],"tests":[114],"assessed":[115],"naturalness":[117],"vocal":[119],"similarity.":[120],"Results":[121],"indicate":[122],"data":[125,147],"enables":[126],"effective":[127],"adaptation":[128,135,172],"multispeaker":[130],"TTS":[131,174],"models,":[132],"outperforming":[133],"direct":[134],"very":[137],"limited":[138],"data.":[140],"Moreover,":[141],"amount":[143],"used":[148],"train":[150],"model":[153],"had":[154],"minimal":[155],"impact":[156],"on":[157],"final":[158],"synthesis":[159],"quality.":[160],"These":[161],"findings":[162],"highlight":[163],"potential":[165],"low-resource":[170],"Serbian.":[177]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
