{"id":"https://openalex.org/W2963830550","doi":"https://doi.org/10.21437/interspeech.2018-1830","title":"Multi-target Voice Conversion without Parallel Data by Adversarially Learning Disentangled Audio Representations","display_name":"Multi-target Voice Conversion without Parallel Data by Adversarially Learning Disentangled Audio Representations","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https://openalex.org/W2963830550","doi":"https://doi.org/10.21437/interspeech.2018-1830","mag":"2963830550"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2018-1830","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1830","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049062211","display_name":"Ju-Chieh Chou","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Ju-chieh Chou","raw_affiliation_strings":["College of Electrical Engineering and Computer Science, National Taiwan University"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Computer Science, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074874599","display_name":"Cheng-chieh Yeh","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Cheng-chieh Yeh","raw_affiliation_strings":["College of Electrical Engineering and Computer Science, National Taiwan University"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Computer Science, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040508737","display_name":"Hung-yi Lee","orcid":"https://orcid.org/0000-0002-9654-5747"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-yi Lee","raw_affiliation_strings":["College of Electrical Engineering and Computer Science, National Taiwan University"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Computer Science, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044010123","display_name":"Lin-shan Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Lin-shan Lee","raw_affiliation_strings":["College of Electrical Engineering and Computer Science, National Taiwan University"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Computer Science, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5049062211"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":18.24155743,"has_fulltext":false,"cited_by_count":135,"citation_normalized_percentile":{"value":0.9944993,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"501","last_page":"505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7860788702964783},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7371054887771606},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.649671733379364},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.6372852325439453},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5558006763458252},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5419102907180786},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5295392274856567},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4813002645969391},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4391917586326599},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3968289792537689},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3381919860839844},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2951343059539795},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.11548370122909546}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7860788702964783},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7371054887771606},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.649671733379364},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.6372852325439453},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5558006763458252},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5419102907180786},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5295392274856567},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4813002645969391},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4391917586326599},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3968289792537689},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3381919860839844},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2951343059539795},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.11548370122909546},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2018-1830","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1830","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7099999785423279,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1509691205","https://openalex.org/W1523372075","https://openalex.org/W1959608418","https://openalex.org/W1987992317","https://openalex.org/W2056852181","https://openalex.org/W2057609679","https://openalex.org/W2086796102","https://openalex.org/W2105160541","https://openalex.org/W2120605154","https://openalex.org/W2127520494","https://openalex.org/W2148846882","https://openalex.org/W2156142001","https://openalex.org/W2157412983","https://openalex.org/W2396025094","https://openalex.org/W2476548250","https://openalex.org/W2502312327","https://openalex.org/W2532494225","https://openalex.org/W2547364378","https://openalex.org/W2608207374","https://openalex.org/W2619034550","https://openalex.org/W2621350877","https://openalex.org/W2651834199","https://openalex.org/W2758785877","https://openalex.org/W2774848319","https://openalex.org/W2950776302","https://openalex.org/W2962896155","https://openalex.org/W2962974898","https://openalex.org/W2963073614","https://openalex.org/W2963808252","https://openalex.org/W2964135678","https://openalex.org/W4295521014","https://openalex.org/W4298426053","https://openalex.org/W4320013936"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1999004162","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863","https://openalex.org/W2696990509"],"abstract_inverted_index":{"Recently,":[0],"cycle-consistent":[1],"adversarial":[2,37],"network":[3],"(Cycle-GAN)":[4],"has":[5,180],"been":[6,181],"successfully":[7],"applied":[8],"to":[9,12,51,55,79,93,113],"voice":[10,41,54,116,165,169],"conversion":[11,170],"a":[13,45],"different":[14,57],"speaker":[15,66,85,91,108,120,151,178],"without":[16,60],"parallel":[17,61],"data,":[18,62],"although":[19],"in":[20,72,158],"those":[21],"approaches":[22],"an":[23,36],"individual":[24],"model":[25,47],"is":[26,76,133],"needed":[27],"for":[28,40],"each":[29],"target":[30,107,119,150],"speaker.In":[31],"this":[32],"paper,":[33],"we":[34],"propose":[35],"learning":[38],"framework":[39],"conversion,":[42],"with":[43,121,138],"which":[44],"single":[46],"can":[48],"be":[49],"trained":[50,78],"convert":[52],"the":[53,65,69,95,101,106,111,115,118,122,126,139,159,177,185],"many":[56],"speakers,":[58],"all":[59],"by":[63,136,143],"separating":[64],"characteristics":[67],"from":[68,184],"linguistic":[70,123],"content":[71,124],"speech":[73],"signals.An":[74],"autoencoder":[75],"first":[77],"extract":[80],"speaker-independent":[81,102],"latent":[82,96,103,186],"representations":[83],"and":[84,105,148,162],"embedding":[86,109],"separately":[87],"using":[88],"another":[89,144],"auxiliary":[90],"classifier":[92],"regularize":[94],"representation.The":[97],"decoder":[98,131],"then":[99],"takes":[100],"representation":[104],"as":[110],"input":[112],"generate":[114],"of":[117,125,130,146,154],"source":[127],"utterance.The":[128],"quality":[129,166],"output":[132],"further":[134],"improved":[135],"patching":[137],"residual":[140],"signal":[141],"produced":[142],"pair":[145],"generator":[147],"discriminator.A":[149],"set":[152],"size":[153],"20":[155],"was":[156,167],"tested":[157],"preliminary":[160],"experiments,":[161],"very":[163],"good":[164],"obtained.Conventional":[168],"metrics":[171],"are":[172],"reported.We":[173],"also":[174],"show":[175],"that":[176],"information":[179],"properly":[182],"reduced":[183],"representations.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":36},{"year":2020,"cited_by_count":42},{"year":2019,"cited_by_count":17},{"year":2018,"cited_by_count":6}],"updated_date":"2026-02-20T08:17:22.645390","created_date":"2025-10-10T00:00:00"}
