{"id":"https://openalex.org/W2939131199","doi":"https://doi.org/10.1109/icassp.2019.8682589","title":"Attention-based Wavenet Autoencoder for Universal Voice Conversion","display_name":"Attention-based Wavenet Autoencoder for Universal Voice Conversion","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2939131199","doi":"https://doi.org/10.1109/icassp.2019.8682589","mag":"2939131199"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8682589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682589","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038017098","display_name":"Adam Polyak","orcid":"https://orcid.org/0000-0003-2563-2111"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Adam Polyak","raw_affiliation_strings":["Facebook AI Research"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078102229","display_name":"Lior Wolf","orcid":"https://orcid.org/0000-0001-5578-8892"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Lior Wolf","raw_affiliation_strings":["Facebook AI Research"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038017098"],"corresponding_institution_ids":["https://openalex.org/I2252078561"],"apc_list":null,"apc_paid":null,"fwci":3.3604,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.93920017,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6800","last_page":"6804"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.9235749244689941},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7852791547775269},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7593505382537842},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7345293760299683},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5737239122390747},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.510006308555603},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.49361956119537354},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.41999173164367676}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.9235749244689941},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7852791547775269},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7593505382537842},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7345293760299683},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5737239122390747},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.510006308555603},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.49361956119537354},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.41999173164367676},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8682589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682589","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W1810943226","https://openalex.org/W1959608418","https://openalex.org/W2161476805","https://openalex.org/W2397499194","https://openalex.org/W2518312472","https://openalex.org/W2519091744","https://openalex.org/W2532494225","https://openalex.org/W2591927543","https://openalex.org/W2604184139","https://openalex.org/W2606176153","https://openalex.org/W2619368999","https://openalex.org/W2651834199","https://openalex.org/W2739748921","https://openalex.org/W2766812927","https://openalex.org/W2769810959","https://openalex.org/W2803963372","https://openalex.org/W2889329491","https://openalex.org/W2901997113","https://openalex.org/W2917245127","https://openalex.org/W2917688842","https://openalex.org/W2949382160","https://openalex.org/W2962739369","https://openalex.org/W2962896155","https://openalex.org/W2963035245","https://openalex.org/W2963233633","https://openalex.org/W2963285578","https://openalex.org/W2963403868","https://openalex.org/W2963534259","https://openalex.org/W2963609956","https://openalex.org/W2963712897","https://openalex.org/W2963782041","https://openalex.org/W2964243274","https://openalex.org/W2964281804","https://openalex.org/W4294619240","https://openalex.org/W4298174729","https://openalex.org/W4298857617","https://openalex.org/W4385245566","https://openalex.org/W6637618735","https://openalex.org/W6638273328","https://openalex.org/W6640963894","https://openalex.org/W6677973343","https://openalex.org/W6685562342","https://openalex.org/W6712160772","https://openalex.org/W6728445508","https://openalex.org/W6734815144","https://openalex.org/W6736723571","https://openalex.org/W6738277540","https://openalex.org/W6739901393","https://openalex.org/W6741832134","https://openalex.org/W6745569068","https://openalex.org/W6745697700","https://openalex.org/W6746700228","https://openalex.org/W6748573829","https://openalex.org/W6752096078","https://openalex.org/W6756197946","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W2159052453","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W2145836866","https://openalex.org/W2803255133"],"abstract_inverted_index":{"We":[0],"present":[1,78],"a":[2,9,17,24,66,79,115],"method":[3,13,99],"for":[4],"converting":[5],"any":[6,91],"voice":[7,68],"to":[8,55,73,88,102],"target":[10,75],"voice.":[11],"The":[12],"is":[14,44,86,100],"based":[15],"on":[16],"WaveNet":[18],"autoencoder,":[19],"with":[20],"the":[21,30,35,38,42,52,57,74,97,104,107,111],"addition":[22],"of":[23,32,106],"novel":[25],"attention":[26,43],"component":[27],"that":[28,85,96,110],"supports":[29],"modification":[31],"timing":[33,59,105],"between":[34],"input":[36],"and":[37,109],"output":[39],"samples.":[40],"Training":[41],"done":[45],"in":[46],"an":[47,61],"unsupervised":[48],"way,":[49],"by":[50],"teaching":[51],"neural":[53],"network":[54],"recover":[56,103],"original":[58],"from":[60],"artificially":[62],"modified":[63],"one.":[64],"Adding":[65],"generic":[67],"robot,":[69],"which":[70],"we":[71,77],"convert":[72],"voice,":[76],"robust":[80],"Text":[81,117],"To":[82,118],"Speech":[83,119],"pipeline":[84,113],"able":[87,101],"train":[89],"without":[90],"transcript.":[92],"Our":[93],"experiments":[94],"show":[95],"proposed":[98,112],"speaker":[108],"provides":[114],"competitive":[116],"method.":[120]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
