{"id":"https://openalex.org/W4372260157","doi":"https://doi.org/10.1109/icassp49357.2023.10095842","title":"Nonparallel Emotional Voice Conversion for Unseen Speaker-Emotion Pairs Using Dual Domain Adversarial Network &amp; Virtual Domain Pairing","display_name":"Nonparallel Emotional Voice Conversion for Unseen Speaker-Emotion Pairs Using Dual Domain Adversarial Network &amp; Virtual Domain Pairing","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260157","doi":"https://doi.org/10.1109/icassp49357.2023.10095842"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095842","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091143209","display_name":"Nirmesh J. Shah","orcid":"https://orcid.org/0000-0002-7294-6757"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nirmesh Shah","raw_affiliation_strings":["Sony Research India"],"affiliations":[{"raw_affiliation_string":"Sony Research India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100746898","display_name":"Mayank Singh","orcid":"https://orcid.org/0000-0001-7261-6347"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mayank Singh","raw_affiliation_strings":["Sony Research India"],"affiliations":[{"raw_affiliation_string":"Sony Research India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101589290","display_name":"Naoya Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naoya Takahashi","raw_affiliation_strings":["Sony Group Corporation,Japan","Sony Group Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017109333","display_name":"Naoyuki Onoe","orcid":"https://orcid.org/0000-0002-8709-7241"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naoyuki Onoe","raw_affiliation_strings":["Sony Research India"],"affiliations":[{"raw_affiliation_string":"Sony Research India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091143209"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3903,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.84505339,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7295997142791748},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.6567582488059998},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6104815602302551},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5283623337745667},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.5235751867294312},{"id":"https://openalex.org/keywords/emotion-classification","display_name":"Emotion classification","score":0.48790690302848816},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.47669336199760437},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4637044072151184},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.44342365860939026},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.42853277921676636},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.4235772490501404},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.412354975938797},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0966247022151947},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0805288553237915}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7295997142791748},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.6567582488059998},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6104815602302551},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5283623337745667},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.5235751867294312},{"id":"https://openalex.org/C206310091","wikidata":"https://www.wikidata.org/wiki/Q750859","display_name":"Emotion classification","level":2,"score":0.48790690302848816},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.47669336199760437},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4637044072151184},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.44342365860939026},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.42853277921676636},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.4235772490501404},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.412354975938797},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0966247022151947},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0805288553237915},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095842","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2040587156","https://openalex.org/W2161736993","https://openalex.org/W2511640485","https://openalex.org/W2517513811","https://openalex.org/W2883743124","https://openalex.org/W2899361462","https://openalex.org/W2938833595","https://openalex.org/W2962788625","https://openalex.org/W2972366998","https://openalex.org/W3015336668","https://openalex.org/W3015338123","https://openalex.org/W3025680351","https://openalex.org/W3034600949","https://openalex.org/W3095169545","https://openalex.org/W3096791258","https://openalex.org/W3096939667","https://openalex.org/W3147311044","https://openalex.org/W3163573274","https://openalex.org/W3168292814","https://openalex.org/W3194143312","https://openalex.org/W3196643119","https://openalex.org/W3196667132","https://openalex.org/W3197993066","https://openalex.org/W3205065526","https://openalex.org/W4205742757","https://openalex.org/W4224301045","https://openalex.org/W4225264140","https://openalex.org/W6761075046","https://openalex.org/W6775580011","https://openalex.org/W6793489163","https://openalex.org/W6796575454","https://openalex.org/W6802762330"],"related_works":["https://openalex.org/W2995777218","https://openalex.org/W2366944513","https://openalex.org/W1550318927","https://openalex.org/W4305042383","https://openalex.org/W2546649374","https://openalex.org/W2773396412","https://openalex.org/W4380854332","https://openalex.org/W2184859701","https://openalex.org/W4386232293","https://openalex.org/W4380370144"],"abstract_inverted_index":{"Primary":[0],"goal":[1],"of":[2,14,30,34,52,56,67,140],"an":[3],"emotional":[4,156],"voice":[5],"conversion":[6,107],"(EVC)":[7],"system":[8],"is":[9],"to":[10,22,108],"convert":[11,38],"the":[12,27,31,35,50,54,65,91,106,124,132,137,150],"emotion":[13,55,94],"a":[15,80,114,141,154],"given":[16],"speech":[17],"signal":[18],"from":[19],"one":[20],"style":[21,24,95],"another":[23],"without":[25,135],"modifying":[26],"linguistic":[28],"content":[29],"signal.":[32],"Most":[33],"state-of-the-art":[36],"approaches":[37],"emotions":[39],"for":[40,89],"seen":[41],"speaker-emotion":[42,73,110,125],"combinations":[43],"only.":[44],"In":[45],"this":[46,76],"paper,":[47],"we":[48,78,112],"tackle":[49],"problem":[51],"converting":[53],"speakers":[57],"whose":[58],"only":[59],"neutral":[60],"data":[61,134],"are":[62,128],"present":[63,130],"during":[64],"time":[66],"training":[68,119],"and":[69,93,143],"testing":[70],"(i.e.,":[71],"unseen":[72,109],"combinations).":[74],"To":[75],"end,":[77],"extend":[79],"recently":[81],"proposed":[82,151],"StartGANv2-VC":[83],"architecture":[84],"by":[85],"utilizing":[86],"dual":[87,100],"encoders":[88],"learning":[90],"speaker":[92],"embeddings":[96],"separately":[97],"along":[98],"with":[99],"domain":[101],"source":[102],"classifiers.":[103],"For":[104],"achieving":[105],"combinations,":[111],"propose":[113],"Virtual":[115],"Domain":[116],"Pairing":[117],"(VDP)":[118],"strategy,":[120],"which":[121],"virtually":[122],"incorporates":[123],"pairs":[126],"that":[127],"not":[129],"in":[131,145],"real":[133],"compromising":[136],"min-max":[138],"game":[139],"discriminator":[142],"generator":[144],"adversarial":[146],"training.":[147],"We":[148],"evaluate":[149],"method":[152],"using":[153],"Hindi":[155],"database.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
