{"id":"https://openalex.org/W3213313228","doi":"https://doi.org/10.1109/asru51503.2021.9688277","title":"AC-VC: Non-Parallel Low Latency Phonetic Posteriorgrams Based Voice Conversion","display_name":"AC-VC: Non-Parallel Low Latency Phonetic Posteriorgrams Based Voice Conversion","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W3213313228","doi":"https://doi.org/10.1109/asru51503.2021.9688277","mag":"3213313228"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688277","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688277","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2111.06601","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025679202","display_name":"Damien Ronssin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210148785","display_name":"Logitech (Switzerland)","ror":"https://ror.org/05pkpss54","country_code":"CH","type":"company","lineage":["https://openalex.org/I4210148785"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Damien Ronssin","raw_affiliation_strings":["Logitech Europe S.A.,Lausanne,Switzerland,1015"],"affiliations":[{"raw_affiliation_string":"Logitech Europe S.A.,Lausanne,Switzerland,1015","institution_ids":["https://openalex.org/I4210148785"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003558571","display_name":"Milo\u0161 Cer\u0148ak","orcid":"https://orcid.org/0000-0002-5569-9491"},"institutions":[{"id":"https://openalex.org/I4210148785","display_name":"Logitech (Switzerland)","ror":"https://ror.org/05pkpss54","country_code":"CH","type":"company","lineage":["https://openalex.org/I4210148785"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Milos Cernak","raw_affiliation_strings":["Logitech Europe S.A.,Lausanne,Switzerland,1015"],"affiliations":[{"raw_affiliation_string":"Logitech Europe S.A.,Lausanne,Switzerland,1015","institution_ids":["https://openalex.org/I4210148785"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5025679202"],"corresponding_institution_ids":["https://openalex.org/I4210148785"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13293303,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"710","last_page":"716"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.7422982454299927},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7252081036567688},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6894215941429138},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6735744476318359},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.49527087807655334},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.48029467463493347},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4306918978691101},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23797640204429626},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12235608696937561}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.7422982454299927},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7252081036567688},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6894215941429138},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6735744476318359},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.49527087807655334},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48029467463493347},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4306918978691101},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23797640204429626},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12235608696937561},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/asru51503.2021.9688277","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688277","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2111.06601","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.06601","pdf_url":"https://arxiv.org/pdf/2111.06601","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3213313228","is_oa":true,"landing_page_url":"http://arxiv.org/pdf/2111.06601.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2111.06601","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2111.06601","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2111.06601","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.06601","pdf_url":"https://arxiv.org/pdf/2111.06601","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3213313228.pdf","grobid_xml":"https://content.openalex.org/works/W3213313228.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W2471520273","https://openalex.org/W2473388484","https://openalex.org/W2518172956","https://openalex.org/W2527729766","https://openalex.org/W2729190387","https://openalex.org/W2949281321","https://openalex.org/W2949382160","https://openalex.org/W2962780374","https://openalex.org/W2963091184","https://openalex.org/W2963609956","https://openalex.org/W2964307104","https://openalex.org/W2973216307","https://openalex.org/W2994715919","https://openalex.org/W3015338123","https://openalex.org/W3025844872","https://openalex.org/W3082130377","https://openalex.org/W3092353990","https://openalex.org/W3094923859","https://openalex.org/W4244543785","https://openalex.org/W6631362777","https://openalex.org/W6726528559","https://openalex.org/W6736996214","https://openalex.org/W6748409065","https://openalex.org/W6762533536","https://openalex.org/W6784299611","https://openalex.org/W6840412704"],"related_works":["https://openalex.org/W3196667132","https://openalex.org/W3213210955","https://openalex.org/W3092353990","https://openalex.org/W3034982560","https://openalex.org/W3208926273","https://openalex.org/W3095498130","https://openalex.org/W3005653393","https://openalex.org/W3109037759","https://openalex.org/W3168719651","https://openalex.org/W2909726860","https://openalex.org/W2898654681","https://openalex.org/W3213785244","https://openalex.org/W3117936886","https://openalex.org/W2785765285","https://openalex.org/W2563906718","https://openalex.org/W3163475957","https://openalex.org/W2950865938","https://openalex.org/W3129442494","https://openalex.org/W3100237905","https://openalex.org/W3024476726"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"AC-VC":[3,90],"(Almost":[4],"Causal":[5],"Voice":[6,101],"Conversion),":[7],"a":[8,63,66,74,80,108],"phonetic":[9],"posteriorgrams":[10],"based":[11],"voice":[12,19,47,137],"conversion":[13,20,48,138],"system":[14,30,91],"that":[15,117],"can":[16],"perform":[17],"any-to-many":[18],"while":[21],"having":[22],"only":[23],"57.5":[24],"ms":[25],"future":[26,70,75,119],"look-ahead.":[27],"The":[28],"complete":[29],"is":[31,129],"composed":[32],"of":[33,44,55,69,99,110,127,134],"three":[34],"neural":[35],"networks":[36],"trained":[37],"separately":[38],"with":[39,94,107],"non-parallel":[40],"data.":[41],"While":[42],"most":[43],"the":[45,88,95,100,114,132],"current":[46,135],"systems":[49],"focus":[50],"primarily":[51],"on":[52,61],"quality":[53],"irrespective":[54],"algorithmic":[56],"latency,":[57],"this":[58,86],"work":[59],"elaborates":[60],"designing":[62],"method":[64],"using":[65],"minimal":[67],"amount":[68],"context":[71,120],"thus":[72],"allowing":[73],"real-time":[76],"implementation.":[77],"According":[78],"to":[79],"subjective":[81],"listening":[82],"test":[83],"organized":[84],"in":[85,105],"work,":[87],"proposed":[89],"achieves":[92],"parity":[93],"non-causal":[96],"ASR-TTS":[97],"baseline":[98],"Conversion":[102],"Challenge":[103],"2020":[104],"naturalness":[106],"MOS":[109],"3.5.":[111],"In":[112],"contrast,":[113],"results":[115],"indicate":[116],"missing":[118],"impacts":[121],"speaker":[122],"similarity.":[123],"Obtained":[124],"similarity":[125,133],"percentage":[126],"65%":[128],"lower":[130],"than":[131],"best":[136],"systems.":[139]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
