{"id":"https://openalex.org/W3011193554","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023277","title":"Many-to-many Cross-lingual Voice Conversion with a Jointly Trained Speaker Embedding Network","display_name":"Many-to-many Cross-lingual Voice Conversion with a Jointly Trained Speaker Embedding Network","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3011193554","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023277","mag":"3011193554"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023277","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023277","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101846956","display_name":"Yi Zhou","orcid":"https://orcid.org/0000-0002-8520-8227"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yi Zhou","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103156833","display_name":"Xiaohai Tian","orcid":"https://orcid.org/0000-0001-5219-1249"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiaohai Tian","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029637694","display_name":"Rohan Kumar Das","orcid":"https://orcid.org/0000-0002-1332-3357"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Rohan Kumar Das","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101846956"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":1.2601,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.85682729,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"86","issue":null,"first_page":"1282","last_page":"1287"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8199145793914795},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6797887086868286},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6654716730117798},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5693918466567993},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5183268785476685},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5022661685943604},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.48708391189575195},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4517313241958618},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.44965317845344543},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.42626234889030457},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.424924373626709}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8199145793914795},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6797887086868286},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6654716730117798},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5693918466567993},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5183268785476685},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5022661685943604},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.48708391189575195},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4517313241958618},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.44965317845344543},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.42626234889030457},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.424924373626709},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023277","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023277","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W59075858","https://openalex.org/W95152782","https://openalex.org/W799143314","https://openalex.org/W1524333225","https://openalex.org/W1535240601","https://openalex.org/W1562777581","https://openalex.org/W1602430027","https://openalex.org/W2013996527","https://openalex.org/W2086796102","https://openalex.org/W2093632031","https://openalex.org/W2111550316","https://openalex.org/W2118850452","https://openalex.org/W2123808477","https://openalex.org/W2130290868","https://openalex.org/W2150769028","https://openalex.org/W2154920538","https://openalex.org/W2156142001","https://openalex.org/W2156477760","https://openalex.org/W2161476805","https://openalex.org/W2398071208","https://openalex.org/W2404444716","https://openalex.org/W2405464168","https://openalex.org/W2471520273","https://openalex.org/W2473388484","https://openalex.org/W2507912506","https://openalex.org/W2518172956","https://openalex.org/W2574092538","https://openalex.org/W2591794820","https://openalex.org/W2598638573","https://openalex.org/W2608338293","https://openalex.org/W2619368999","https://openalex.org/W2785608393","https://openalex.org/W2785978752","https://openalex.org/W2806000759","https://openalex.org/W2807668517","https://openalex.org/W2889064624","https://openalex.org/W2892734764","https://openalex.org/W2898832121","https://openalex.org/W2911340057","https://openalex.org/W2924115626","https://openalex.org/W2938583109","https://openalex.org/W2941094131","https://openalex.org/W2962896155","https://openalex.org/W2963035245","https://openalex.org/W2964195110","https://openalex.org/W2972999331","https://openalex.org/W4251158933","https://openalex.org/W6602386084","https://openalex.org/W6603838645","https://openalex.org/W6631362777","https://openalex.org/W6636404455","https://openalex.org/W6713416282","https://openalex.org/W6713491369","https://openalex.org/W6726528559","https://openalex.org/W6748118087","https://openalex.org/W6765987481","https://openalex.org/W6936113694"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1999004162","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863","https://openalex.org/W2696990509"],"abstract_inverted_index":{"Among":[0],"various":[1],"voice":[2,69],"conversion":[3,70],"(VC)":[4],"techniques,":[5],"average":[6,35,74],"modeling":[7,36],"approach":[8],"has":[9],"achieved":[10],"good":[11],"performance":[12],"as":[13],"it":[14,61],"benefits":[15],"from":[16,29],"training":[17,27],"data":[18,28],"of":[19,42,102,115],"multiple":[20],"speakers,":[21],"therefore,":[22],"reducing":[23],"the":[24,30,40,46,67,73,91,100,103,116,124,128,133,139,144],"reliance":[25],"on":[26,39],"target":[31],"speaker.":[32],"Many":[33],"existing":[34],"approaches":[37],"rely":[38],"use":[41],"i-vector":[43,54,125],"to":[44,65],"represent":[45],"speaker":[47,86,129,148],"identity":[48],"for":[49,72,95],"model":[50],"adaptation.":[51],"As":[52],"such":[53],"is":[55,62,113,136],"extracted":[56],"in":[57,120,132],"a":[58,82,108],"separate":[59],"process,":[60],"not":[63],"optimized":[64],"achieve":[66],"best":[68],"quality":[71,146],"model.":[75],"To":[76],"address":[77],"this":[78],"problem,":[79],"we":[80],"propose":[81],"low":[83],"dimensional":[84],"trainable":[85],"embedding":[87,130],"network":[88,94,131],"that":[89,138],"augments":[90],"primary":[92],"VC":[93],"joint":[96],"training.":[97],"We":[98,122],"validate":[99],"effectiveness":[101],"proposed":[104,140],"idea":[105],"by":[106],"performing":[107],"many-to-many":[109],"cross-lingual":[110],"VC,":[111],"which":[112],"one":[114],"most":[117],"challenging":[118],"tasks":[119],"VC.":[121],"compare":[123],"scheme":[126],"with":[127],"experiments.":[134],"It":[135],"found":[137],"system":[141],"effectively":[142],"improves":[143],"speech":[145],"and":[147],"similarity.":[149]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
