{"id":"https://openalex.org/W3162512456","doi":"https://doi.org/10.1109/icassp39728.2021.9414851","title":"Maskcyclegan-VC: Learning Non-Parallel Voice Conversion with Filling in Frames","display_name":"Maskcyclegan-VC: Learning Non-Parallel Voice Conversion with Filling in Frames","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3162512456","doi":"https://doi.org/10.1109/icassp39728.2021.9414851","mag":"3162512456"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414851","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414851","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020693766","display_name":"Takuhiro Kaneko","orcid":"https://orcid.org/0009-0000-8016-5144"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takuhiro Kaneko","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001243214","display_name":"Hirokazu Kameoka","orcid":"https://orcid.org/0000-0003-3102-0162"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirokazu Kameoka","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106710403","display_name":"Kou Tanaka","orcid":"https://orcid.org/0009-0003-7107-607X"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kou Tanaka","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079710814","display_name":"Nobukatsu Hojo","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobukatsu Hojo","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5020693766"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":null,"apc_paid":null,"fwci":6.9965,"has_fulltext":false,"cited_by_count":72,"citation_normalized_percentile":{"value":0.97443911,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5919","last_page":"5923"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8023601770401001},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.752557098865509},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.6638221740722656},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5945041179656982},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.4983394145965576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4781273901462555},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4568929672241211},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3422391414642334},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32233723998069763}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8023601770401001},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.752557098865509},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.6638221740722656},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5945041179656982},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.4983394145965576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4781273901462555},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4568929672241211},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3422391414642334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32233723998069763},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414851","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414851","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":73,"referenced_works":["https://openalex.org/W1509691205","https://openalex.org/W1522301498","https://openalex.org/W1959608418","https://openalex.org/W2017742648","https://openalex.org/W2022125261","https://openalex.org/W2056852181","https://openalex.org/W2095705004","https://openalex.org/W2099471712","https://openalex.org/W2120605154","https://openalex.org/W2142300631","https://openalex.org/W2148846882","https://openalex.org/W2156142001","https://openalex.org/W2157412983","https://openalex.org/W2193413348","https://openalex.org/W2339754110","https://openalex.org/W2471520273","https://openalex.org/W2474531669","https://openalex.org/W2518172956","https://openalex.org/W2518312472","https://openalex.org/W2532494225","https://openalex.org/W2593414223","https://openalex.org/W2598581049","https://openalex.org/W2747744257","https://openalex.org/W2774848319","https://openalex.org/W2784823820","https://openalex.org/W2804998325","https://openalex.org/W2896457183","https://openalex.org/W2897353073","https://openalex.org/W2899877258","https://openalex.org/W2902070858","https://openalex.org/W2937579788","https://openalex.org/W2945478979","https://openalex.org/W2946555236","https://openalex.org/W2951939904","https://openalex.org/W2962793481","https://openalex.org/W2962896155","https://openalex.org/W2963035245","https://openalex.org/W2963300588","https://openalex.org/W2963341956","https://openalex.org/W2963420272","https://openalex.org/W2963444790","https://openalex.org/W2963539064","https://openalex.org/W2964121744","https://openalex.org/W2964243274","https://openalex.org/W2964341837","https://openalex.org/W2970006822","https://openalex.org/W2972544500","https://openalex.org/W2972667718","https://openalex.org/W2975414524","https://openalex.org/W2996286887","https://openalex.org/W2996414377","https://openalex.org/W3015338123","https://openalex.org/W3015959238","https://openalex.org/W3034420534","https://openalex.org/W3095936335","https://openalex.org/W3099078140","https://openalex.org/W3100696337","https://openalex.org/W3101689408","https://openalex.org/W3123097577","https://openalex.org/W4293398859","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6640963894","https://openalex.org/W6674330103","https://openalex.org/W6687566353","https://openalex.org/W6703724764","https://openalex.org/W6730095352","https://openalex.org/W6735204497","https://openalex.org/W6746801104","https://openalex.org/W6755207826","https://openalex.org/W6762533536","https://openalex.org/W6767111847","https://openalex.org/W6782760101"],"related_works":["https://openalex.org/W2029561777","https://openalex.org/W1554502231","https://openalex.org/W172797710","https://openalex.org/W2945105049","https://openalex.org/W2626699140","https://openalex.org/W4387098302","https://openalex.org/W3165080709","https://openalex.org/W2948317131","https://openalex.org/W2897924318","https://openalex.org/W2138997758"],"abstract_inverted_index":{"Non-parallel":[0],"voice":[1,9],"conversion":[2,44,48],"(VC)":[3],"is":[4,40,86,95,101],"a":[5,12,104,117,147,179],"technique":[6],"for":[7,154],"training":[8],"converters":[10],"without":[11],"parallel":[13],"corpus.":[14],"Cycle-consistent":[15],"adversarial":[16],"network-based":[17],"VCs":[18],"(CycleGAN-VC":[19],"and":[20,45,100,124,150,167,176],"CycleGAN-VC2)":[21],"are":[22],"widely":[23],"accepted":[24],"as":[25,159],"benchmark":[26],"methods.":[27],"However,":[28,77],"owing":[29],"to":[30,34,42,120,128,142,183],"their":[31,38],"insufficient":[32],"ability":[33],"grasp":[35],"time-frequency":[36,70,144],"structures,":[37],"application":[39],"limited":[41],"mel-cepstrum":[43],"not":[46],"mel-spectrogram":[47,53,123],"despite":[49],"recent":[50],"advances":[51],"in":[52,80,110,130,146],"vocoders.":[54],"To":[55],"overcome":[56],"this,":[57],"CycleGAN-VC3,":[58],"an":[59,66,78,89,155],"improved":[60],"variant":[61],"of":[62,83,98,164,185],"CycleGAN-VC2":[63,99,175],"that":[64,171,184],"incorporates":[65],"additional":[67,156],"module":[68,157],"called":[69,108],"adaptive":[71],"normalization":[72],"(TFAN),":[73],"has":[74],"been":[75],"proposed.":[76],"increase":[79],"the":[81,121,126,140,152,165],"number":[82],"learned":[84],"parameters":[85],"imposed.":[87],"As":[88],"alternative,":[90],"we":[91,115],"propose":[92],"MaskCycleGAN-VC,":[93],"which":[94],"another":[96],"extension":[97],"trained":[102],"using":[103],"novel":[105],"auxiliary":[106],"task":[107,138],"filling":[109],"frames":[111,132],"(FIF).":[112],"With":[113],"FIF,":[114],"apply":[116],"temporal":[118],"mask":[119],"input":[122],"encourage":[125],"converter":[127,141],"fill":[129],"missing":[131],"based":[133],"on":[134],"surrounding":[135],"frames.":[136],"This":[137],"allows":[139],"learn":[143],"structures":[145],"self-supervised":[148],"manner":[149],"eliminates":[151],"need":[153],"such":[158],"TFAN.":[160],"A":[161],"subjective":[162],"evaluation":[163],"naturalness":[166],"speaker":[168],"similarity":[169],"showed":[170],"MaskCycleGAN-VC":[172],"outperformed":[173],"both":[174],"CycleGAN-VC3":[177],"with":[178],"model":[180],"size":[181],"similar":[182],"CycleGAN-VC2.":[186],"<sup":[187],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[188],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[189]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":22},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":4}],"updated_date":"2026-05-09T13:55:54.758798","created_date":"2025-10-10T00:00:00"}
