{"id":"https://openalex.org/W4312637065","doi":"https://doi.org/10.1109/tmm.2022.3222646","title":"Speaker-Independent Emotional Voice Conversion via Disentangled Representations","display_name":"Speaker-Independent Emotional Voice Conversion via Disentangled Representations","publication_year":2022,"publication_date":"2022-11-16","ids":{"openalex":"https://openalex.org/W4312637065","doi":"https://doi.org/10.1109/tmm.2022.3222646"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3222646","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3222646","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065285274","display_name":"Xunquan Chen","orcid":"https://orcid.org/0000-0002-5336-6386"},"institutions":[{"id":"https://openalex.org/I65837984","display_name":"Kobe University","ror":"https://ror.org/03tgsfw79","country_code":"JP","type":"education","lineage":["https://openalex.org/I65837984"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Xunquan Chen","raw_affiliation_strings":["Graduate School of System Informatics, Kobe University, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of System Informatics, Kobe University, Kobe, Japan","institution_ids":["https://openalex.org/I65837984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081820690","display_name":"Xuexin Xu","orcid":"https://orcid.org/0000-0003-0055-4882"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuexin Xu","raw_affiliation_strings":["Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100627380","display_name":"Jinhui Chen","orcid":"https://orcid.org/0000-0002-3701-9026"},"institutions":[{"id":"https://openalex.org/I122146667","display_name":"Prefectural University of Hiroshima","ror":"https://ror.org/0059h1f24","country_code":"JP","type":"education","lineage":["https://openalex.org/I122146667"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jinhui Chen","raw_affiliation_strings":["Prefectural University of Hiroshima, Hiroshima, Japan"],"affiliations":[{"raw_affiliation_string":"Prefectural University of Hiroshima, Hiroshima, Japan","institution_ids":["https://openalex.org/I122146667"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112884857","display_name":"Zhizhong Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizhong Zhang","raw_affiliation_strings":["Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009283470","display_name":"Tetsuya Takiguchi","orcid":"https://orcid.org/0000-0001-5005-7679"},"institutions":[{"id":"https://openalex.org/I65837984","display_name":"Kobe University","ror":"https://ror.org/03tgsfw79","country_code":"JP","type":"education","lineage":["https://openalex.org/I65837984"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tetsuya Takiguchi","raw_affiliation_strings":["Graduate School of System Informatics, Kobe University, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of System Informatics, Kobe University, Kobe, Japan","institution_ids":["https://openalex.org/I65837984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074364400","display_name":"Edwin R. Hancock","orcid":"https://orcid.org/0000-0003-4496-2028"},"institutions":[{"id":"https://openalex.org/I52099693","display_name":"University of York","ror":"https://ror.org/04m01e293","country_code":"GB","type":"education","lineage":["https://openalex.org/I52099693"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Edwin R. Hancock","raw_affiliation_strings":["Department of Computer Science, University of York, York, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of York, York, U.K","institution_ids":["https://openalex.org/I52099693"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065285274"],"corresponding_institution_ids":["https://openalex.org/I65837984"],"apc_list":null,"apc_paid":null,"fwci":1.1919,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.78410607,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"25","issue":null,"first_page":"7480","last_page":"7493"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7945644855499268},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7104929685592651},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6688621044158936},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5481935739517212},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5401360988616943},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.48287251591682434},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4709526002407074},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4573182463645935},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.4449712634086609},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4324198365211487},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2776041626930237}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7945644855499268},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7104929685592651},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6688621044158936},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5481935739517212},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5401360988616943},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.48287251591682434},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4709526002407074},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4573182463645935},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.4449712634086609},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4324198365211487},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2776041626930237},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tmm.2022.3222646","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3222646","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:eprints.whiterose.ac.uk:193498","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W2040587156","https://openalex.org/W2077801020","https://openalex.org/W2089624917","https://openalex.org/W2107860279","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2399915284","https://openalex.org/W2471520273","https://openalex.org/W2476548250","https://openalex.org/W2502312327","https://openalex.org/W2511640485","https://openalex.org/W2603777577","https://openalex.org/W2885005742","https://openalex.org/W2889329491","https://openalex.org/W2899361462","https://openalex.org/W2962793481","https://openalex.org/W2963539064","https://openalex.org/W2963609956","https://openalex.org/W2963767194","https://openalex.org/W2963830550","https://openalex.org/W2963890275","https://openalex.org/W2970007912","https://openalex.org/W2970737019","https://openalex.org/W2972659941","https://openalex.org/W2993842823","https://openalex.org/W3015212100","https://openalex.org/W3015241559","https://openalex.org/W3016151052","https://openalex.org/W3025680351","https://openalex.org/W3094378471","https://openalex.org/W3095930733","https://openalex.org/W3096939667","https://openalex.org/W3129015415","https://openalex.org/W3135203219","https://openalex.org/W3136699727","https://openalex.org/W3154848313","https://openalex.org/W3161436426","https://openalex.org/W3163573274","https://openalex.org/W3166789519","https://openalex.org/W3175464388","https://openalex.org/W3194557739","https://openalex.org/W3197659778","https://openalex.org/W3197993066","https://openalex.org/W4243316134","https://openalex.org/W6631190155","https://openalex.org/W6724804524","https://openalex.org/W6735913928","https://openalex.org/W6752051073","https://openalex.org/W6761687776","https://openalex.org/W6762533536","https://openalex.org/W6767111847","https://openalex.org/W6779459370","https://openalex.org/W6790622591","https://openalex.org/W6804125026","https://openalex.org/W6844194202"],"related_works":["https://openalex.org/W4297807400","https://openalex.org/W1491159402","https://openalex.org/W4313854686","https://openalex.org/W321304764","https://openalex.org/W2249138175","https://openalex.org/W2611678594","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2029134149"],"abstract_inverted_index":{"Emotional":[0],"Voice":[1],"Conversion":[2],"(EVC)":[3],"technology":[4],"aims":[5],"to":[6,28,132,167],"transfer":[7],"emotional":[8,114],"state":[9],"in":[10,44,64,87,98,105,211],"speech":[11,143,193],"while":[12],"keeping":[13],"the":[14,30,45,59,75,84,129,134,158,169,172,188,195,203,208],"linguistic":[15],"information":[16,136,139,155],"and":[17,94,137,215,221],"speaker":[18,35],"identity":[19],"unchanged.":[20],"Prior":[21],"studies":[22],"on":[23],"EVC":[24,67,178,205],"have":[25,69],"been":[26],"limited":[27,70],"perform":[29],"conversion":[31,71,116],"for":[32,118,179,218],"a":[33,37,111],"specific":[34],"or":[36],"predefined":[38],"set":[39,60],"of":[40,61,77,140,171,191,213],"multiple":[41],"speakers":[42,51,62,120,181],"seen":[43,220],"training":[46,57,85,159,164],"stage.":[47],"When":[48],"encountering":[49],"arbitrary":[50,78,119,180],"that":[52,202],"may":[53],"be":[54,183],"unseen":[55,82,222],"during":[56,83],"(outside":[58],"used":[63],"training),":[65],"existing":[66],"methods":[68],"capabilities.":[72],"However,":[73],"converting":[74],"emotion":[76,135,189],"speakers,":[79],"even":[80],"those":[81],"procedure,":[86],"one":[88],"model":[89,206],"is":[90,165],"much":[91,95],"more":[92,96],"challenging":[93],"attractive":[97],"real-world":[99],"scenarios.":[100],"To":[101,148],"address":[102],"this":[103,106],"problem,":[104],"study,":[107],"we":[108,152],"propose":[109],"SIEVC,":[110],"novel":[112],"speaker-independent":[113,177],"voice":[115],"framework":[117,131],"via":[121],"disentangled":[122],"representation":[123,146],"learning.":[124],"The":[125,198],"proposed":[126,204],"method":[127],"employs":[128],"autoencoder":[130],"disentangle":[133],"emotion-independent":[138],"each":[141],"input":[142],"into":[144,157],"separated":[145],"spaces.":[147],"achieve":[149],"better":[150],"disentanglement,":[151],"incorporate":[153],"mutual":[154],"minimization":[156],"process.":[160],"In":[161],"addition,":[162],"adversarial":[163],"applied":[166],"enhance":[168],"quality":[170],"generated":[173],"audio":[174],"signals.":[175],"Finally,":[176],"could":[182],"achieved":[184],"by":[185],"only":[186],"replacing":[187],"representations":[190],"source":[192],"with":[194],"target":[196],"ones.":[197],"experimental":[199],"results":[200],"demonstrate":[201],"outperforms":[207],"baseline":[209],"models":[210],"terms":[212],"objective":[214],"subjective":[216],"evaluation":[217],"both":[219],"speakers.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
