{"id":"https://openalex.org/W4225316945","doi":"https://doi.org/10.1109/icassp43922.2022.9746368","title":"Text-Free Non-Parallel Many-To-Many Voice Conversion Using Normalising Flow","display_name":"Text-Free Non-Parallel Many-To-Many Voice Conversion Using Normalising Flow","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225316945","doi":"https://doi.org/10.1109/icassp43922.2022.9746368"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746368","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038417409","display_name":"Thomas Merritt","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Thomas Merritt","raw_affiliation_strings":["Amazon Alexa"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027412678","display_name":"Abdelhamid Ezzerg","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdelhamid Ezzerg","raw_affiliation_strings":["Amazon Alexa"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060480943","display_name":"Piotr Bili\u0144ski","orcid":"https://orcid.org/0000-0002-7023-3438"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Piotr Bilinski","raw_affiliation_strings":["Amazon Alexa"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036460629","display_name":"Magdalena Proszewska","orcid":"https://orcid.org/0000-0002-5523-2197"},"institutions":[{"id":"https://openalex.org/I126596746","display_name":"Jagiellonian University","ror":"https://ror.org/03bqmcz70","country_code":"PL","type":"education","lineage":["https://openalex.org/I126596746"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Magdalena Proszewska","raw_affiliation_strings":["Jagiellonian University,Poland","Jagiellonian University, Poland"],"affiliations":[{"raw_affiliation_string":"Jagiellonian University,Poland","institution_ids":["https://openalex.org/I126596746"]},{"raw_affiliation_string":"Jagiellonian University, Poland","institution_ids":["https://openalex.org/I126596746"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000904701","display_name":"Kamil Pokora","orcid":"https://orcid.org/0009-0006-0756-4118"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kamil Pokora","raw_affiliation_strings":["Amazon Alexa"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027755752","display_name":"Roberto Barra-Chicote","orcid":"https://orcid.org/0000-0003-0844-7037"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Roberto Barra-Chicote","raw_affiliation_strings":["Amazon Alexa"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039620116","display_name":"Daniel Korzekwa","orcid":"https://orcid.org/0000-0003-1470-4968"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Korzekwa","raw_affiliation_strings":["Amazon Alexa"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5038417409"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":0.831,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.73103857,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7819787263870239},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5449063777923584},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.5089938640594482},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.49429965019226074},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.4487876892089844},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.44874662160873413},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.44639337062835693},{"id":"https://openalex.org/keywords/text-messaging","display_name":"Text messaging","score":0.431427925825119},{"id":"https://openalex.org/keywords/source-text","display_name":"Source text","score":0.4308197796344757},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.4240812063217163},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3337947726249695},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31688639521598816}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7819787263870239},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5449063777923584},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.5089938640594482},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.49429965019226074},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.4487876892089844},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.44874662160873413},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.44639337062835693},{"id":"https://openalex.org/C3018949938","wikidata":"https://www.wikidata.org/wiki/Q17166101","display_name":"Text messaging","level":2,"score":0.431427925825119},{"id":"https://openalex.org/C120012220","wikidata":"https://www.wikidata.org/wiki/Q1754533","display_name":"Source text","level":2,"score":0.4308197796344757},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.4240812063217163},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3337947726249695},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31688639521598816},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.0},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746368","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2576309025","https://openalex.org/W2752796333","https://openalex.org/W2912298597","https://openalex.org/W2945478979","https://openalex.org/W2947196194","https://openalex.org/W2962788625","https://openalex.org/W2963035245","https://openalex.org/W2963300588","https://openalex.org/W2963799213","https://openalex.org/W2964243274","https://openalex.org/W2992005611","https://openalex.org/W3015282541","https://openalex.org/W3022876224","https://openalex.org/W3025528898","https://openalex.org/W3026874504","https://openalex.org/W3034794073","https://openalex.org/W3096323553","https://openalex.org/W3098557217","https://openalex.org/W3161492781","https://openalex.org/W3162770051","https://openalex.org/W3195171908","https://openalex.org/W3196584150","https://openalex.org/W3198082505","https://openalex.org/W4288337064","https://openalex.org/W4289761690","https://openalex.org/W4294619240","https://openalex.org/W6752910514","https://openalex.org/W6758706709","https://openalex.org/W6762533536","https://openalex.org/W6770970757","https://openalex.org/W6776390925","https://openalex.org/W6777694618","https://openalex.org/W6778083308","https://openalex.org/W6781662123","https://openalex.org/W6843673214"],"related_works":["https://openalex.org/W2580650124","https://openalex.org/W4386190339","https://openalex.org/W2968424575","https://openalex.org/W2029790457","https://openalex.org/W3142333283","https://openalex.org/W3122088529","https://openalex.org/W2085259108","https://openalex.org/W3181673064","https://openalex.org/W3041320102","https://openalex.org/W4223917824"],"abstract_inverted_index":{"Non-parallel":[0],"voice":[1],"conversion":[2],"(VC)":[3],"is":[4,20,30,32,37,141],"typically":[5],"achieved":[6],"using":[7],"lossy":[8],"representations":[9],"of":[10,50,87,95,138],"the":[11,27,41,51,85,88,134,139],"source":[12,28],"speech.":[13,96],"However,":[14],"ensuring":[15],"only":[16],"speaker":[17],"identity":[18],"information":[19,25],"dropped":[21],"whilst":[22],"all":[23],"other":[24],"from":[26],"speech":[29],"retained":[31],"a":[33,92],"large":[34],"challenge.":[35],"This":[36],"particularly":[38],"challenging":[39],"in":[40,103,131],"scenario":[42],"where":[43],"at":[44],"inference-time":[45],"we":[46,61,113],"have":[47,67,74],"no":[48,123],"knowledge":[49],"text":[52],"being":[53],"read,":[54],"i.e.,":[55],"text-free":[56,107,111,126,146],"VC.":[57,78],"To":[58],"mitigate":[59],"this,":[60],"investigate":[62,98],"information-preserving":[63],"VC":[64,102,112,120,147],"approaches.Normalising":[65],"flows":[66,100],"gained":[68],"attention":[69],"for":[70,77,101,110],"text-to-speech":[71],"synthesis,":[72],"however":[73],"been":[75],"under-explored":[76],"Flows":[79],"utilize":[80],"invertible":[81],"functions":[82],"to":[83,143],"learn":[84],"likelihood":[86],"data,":[89],"thus":[90],"provide":[91],"lossless":[93],"encoding":[94],"We":[97],"normalising":[99],"both":[104],"text-conditioned":[105,128],"and":[106,116,127],"scenarios.":[108],"Furthermore,":[109],"compare":[114],"pre-trained":[115],"jointly-learnt":[117],"priors.":[118],"Flow-based":[119],"evaluations":[121],"show":[122],"degradation":[124],"between":[125],"VC,":[129],"resulting":[130],"improvements":[132],"over":[133],"state-of-the-art.":[135],"Also,":[136],"joint-training":[137],"prior":[140],"found":[142],"negatively":[144],"impact":[145],"quality.":[148]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
