{"id":"https://openalex.org/W3097952294","doi":"https://doi.org/10.21437/interspeech.2020-1710","title":"GAZEV: GAN-Based Zero-Shot Voice Conversion Over Non-Parallel Speech Corpus","display_name":"GAZEV: GAN-Based Zero-Shot Voice Conversion Over Non-Parallel Speech Corpus","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3097952294","doi":"https://doi.org/10.21437/interspeech.2020-1710","mag":"3097952294"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1710","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1710","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030460543","display_name":"Zining Zhang","orcid":"https://orcid.org/0000-0002-7992-426X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zining Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039946576","display_name":"Bingsheng He","orcid":"https://orcid.org/0000-0001-8618-4581"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bingsheng He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100633293","display_name":"Zhenjie Zhang","orcid":"https://orcid.org/0000-0003-1079-8784"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenjie Zhang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030460543"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4952,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86672801,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"791","last_page":"795"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9593999981880188,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9552000164985657,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.6753743290901184},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6528576612472534},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5884901285171509},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.46732985973358154},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.41155076026916504},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.1531837284564972},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10350164771080017}],"concepts":[{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.6753743290901184},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6528576612472534},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5884901285171509},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.46732985973358154},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.41155076026916504},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.1531837284564972},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10350164771080017},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-1710","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1710","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4214877189","https://openalex.org/W2074502265","https://openalex.org/W2381179799","https://openalex.org/W4287637665","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4298312966","https://openalex.org/W2728912566"],"abstract_inverted_index":{"Non-parallel":[0],"many-to-many":[1],"voice":[2,16,75,98],"conversion":[3,17,76,99],"is":[4,66,119],"recently":[5],"attract-ing":[6],"huge":[7],"research":[8],"efforts":[9],"in":[10,37,139,149],"the":[11,35,38,44,48,63,120,129,143],"speech":[12,60,161],"processing":[13],"community.":[14],"A":[15],"system":[18],"transforms":[19],"an":[20,83],"utterance":[21,28,40],"of":[22,29,62,122,128,145],"a":[23,30],"source":[24,111],"speaker":[25,32,86,123,146,165],"to":[26,73,105,141,167],"another":[27],"target":[31,49,113],"by":[33,43],"keeping":[34],"content":[36],"original":[39],"and":[41,112,163],"replacing":[42],"vocal":[45],"features":[46],"from":[47],"speaker.":[50],"Existing":[51],"solutions,":[52],"e.g.,":[53],"StarGAN-VC2,":[54],"present":[55,93],"promising":[56],"results,":[57],"only":[58],"when":[59],"corpus":[61],"engaged":[64],"speakers":[65,108],"available":[67],"during":[68],"model":[69],"training.":[70],"AUTOVCis":[71],"able":[72],"perform":[74],"on":[77,109,126,159],"unseen":[78,107],"speakers,":[79],"but":[80],"it":[81],"needs":[82],"external":[84],"pretrained":[85],"verification":[87],"model.":[88],"In":[89],"this":[90],"paper,":[91],"we":[92],"our":[94],"new":[95],"GAN-based":[96],"zero-shot":[97],"solution,":[100],"called":[101],"GAZEV,":[102],"which":[103],"targets":[104],"support":[106],"both":[110],"utterances.":[114],"Our":[115,152],"key":[116],"technical":[117],"contribution":[118],"adoption":[121],"embedding":[124],"loss":[125],"top":[127],"GAN":[130],"framework,":[131],"as":[132,134],"well":[133],"adaptive":[135],"instance":[136],"normalization":[137],"strategy,":[138],"order":[140],"address":[142],"limitations":[144],"identity":[147],"transfer":[148],"existing":[150],"solutions.":[151],"empirical":[153],"evaluations":[154],"demonstrate":[155],"significant":[156],"performance":[157],"improvement":[158],"output":[160],"quality":[162],"comparable":[164],"similarity":[166],"AUTOVC.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2025-10-10T00:00:00"}
