{"id":"https://openalex.org/W2922283382","doi":"https://doi.org/10.23919/apsipa.2018.8659628","title":"Many-to-Many Voice Conversion based on Bottleneck Features with Variational Autoencoder for Non-parallel Training Data","display_name":"Many-to-Many Voice Conversion based on Bottleneck Features with Variational Autoencoder for Non-parallel Training Data","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2922283382","doi":"https://doi.org/10.23919/apsipa.2018.8659628","mag":"2922283382"},"language":"en","primary_location":{"id":"doi:10.23919/apsipa.2018.8659628","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659628","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100385632","display_name":"Yanping Li","orcid":"https://orcid.org/0000-0002-0074-7695"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanping Li","raw_affiliation_strings":["College of Telecommunication & Information Engineering, Nanjing University of Posts and Telecommunications, China"],"affiliations":[{"raw_affiliation_string":"College of Telecommunication & Information Engineering, Nanjing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287909","display_name":"Kong Aik Lee","orcid":"https://orcid.org/0000-0001-9133-3000"},"institutions":[{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kong Aik Lee","raw_affiliation_strings":["Data Science Research Laboratories, NEC Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"Data Science Research Laboratories, NEC Corporation, Japan","institution_ids":["https://openalex.org/I118347220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017808406","display_name":"Yougen Yuan","orcid":"https://orcid.org/0000-0002-2490-566X"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yougen Yuan","raw_affiliation_strings":["Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Department of Electronic and Computer-Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electronic and Computer-Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047980942","display_name":"Zhen Yang","orcid":"https://orcid.org/0000-0002-4400-3823"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Yang","raw_affiliation_strings":["College of Telecommunication & Information Engineering, Nanjing University of Posts and Telecommunications, China"],"affiliations":[{"raw_affiliation_string":"College of Telecommunication & Information Engineering, Nanjing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I41198531"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100385632"],"corresponding_institution_ids":["https://openalex.org/I41198531"],"apc_list":null,"apc_paid":null,"fwci":0.8144,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.80617521,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"829","last_page":"833"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.822185754776001},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.8213924765586853},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6227799654006958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5396130084991455},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5209092497825623},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49692919850349426},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4876657724380493},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4443328380584717},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.44237324595451355},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.43667060136795044},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4278488755226135},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4221838116645813},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4176504611968994},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.40386393666267395},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35718339681625366},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34781548380851746}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.822185754776001},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.8213924765586853},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6227799654006958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5396130084991455},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5209092497825623},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49692919850349426},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4876657724380493},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4443328380584717},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.44237324595451355},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.43667060136795044},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4278488755226135},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4221838116645813},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4176504611968994},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.40386393666267395},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35718339681625366},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34781548380851746},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/apsipa.2018.8659628","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659628","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W95152782","https://openalex.org/W1509691205","https://openalex.org/W1963637322","https://openalex.org/W1965288315","https://openalex.org/W1969728648","https://openalex.org/W2007023536","https://openalex.org/W2011378162","https://openalex.org/W2135832479","https://openalex.org/W2153057929","https://openalex.org/W2156477760","https://openalex.org/W2161476805","https://openalex.org/W2169579015","https://openalex.org/W2290946177","https://openalex.org/W2467604901","https://openalex.org/W2473388484","https://openalex.org/W2475998840","https://openalex.org/W2515020857","https://openalex.org/W2518172956","https://openalex.org/W2518312472","https://openalex.org/W2532494225","https://openalex.org/W2651834199","https://openalex.org/W2745379946","https://openalex.org/W2748561347","https://openalex.org/W2749976445","https://openalex.org/W2890964092","https://openalex.org/W2962896155","https://openalex.org/W6603838645","https://openalex.org/W6684957941"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863","https://openalex.org/W2938358845"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,29,35,44,110],"novel":[4],"approach":[5,88,149],"to":[6,116,127],"many-to-many":[7],"(M2M)":[8],"voice":[9],"conversion":[10,122],"for":[11],"non-parallel":[12],"training":[13,66,85,95,132],"data.":[14,86],"In":[15],"the":[16,40,49,54,65,69,74,81,131,151],"proposed":[17,148],"approach,":[18],"we":[19,58],"first":[20],"obtain":[21],"bottleneck":[22],"features":[23],"(BNFs)":[24],"as":[25],"speaker":[26,55,136],"representations":[27],"from":[28,124],"deep":[30],"neural":[31],"network":[32],"(DNN).":[33],"Then,":[34],"variational":[36],"autoencoder":[37],"(VAE)":[38],"implements":[39],"mapping":[41],"function":[42],"(i.e.,":[43],"reconstruction":[45],"process)":[46],"using":[47],"both":[48],"latent":[50],"semantic":[51],"information":[52],"and":[53,142],"representations.":[56],"Furthermore,":[57],"propose":[59],"an":[60],"adaptive":[61],"scheme":[62],"by":[63],"intervening":[64],"process":[67,101],"of":[68,83,134],"DNN,":[70],"which":[71],"can":[72],"enrich":[73],"target":[75,135],"speaker's":[76],"personality":[77],"feature":[78],"space":[79],"in":[80],"case":[82],"limited":[84],"Our":[87],"has":[89],"three":[90],"advantages:":[91],"1)":[92],"neither":[93],"parallel":[94],"data":[96,133],"nor":[97],"explicit":[98],"frame":[99],"alignment":[100],"is":[102,137],"required;":[103],"2)":[104],"consolidates":[105],"multiple":[106],"pair-wise":[107],"systems":[108],"into":[109],"single":[111],"M2M":[112,121],"model":[113],"(many-source":[114],"speakers":[115],"many-target":[117],"speakers);":[118],"3)":[119],"expands":[120],"task":[123],"closed":[125],"set":[126,129],"open":[128],"when":[130],"very":[138],"limited.":[139],"The":[140],"objective":[141],"subjective":[143],"evaluations":[144],"show":[145],"that":[146],"our":[147],"outperforms":[150],"baseline":[152],"system.":[153]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
