{"id":"https://openalex.org/W4224928197","doi":"https://doi.org/10.1109/icassp43922.2022.9747140","title":"Improving Recognition-Synthesis Based any-to-one Voice Conversion with Cyclic Training","display_name":"Improving Recognition-Synthesis Based any-to-one Voice Conversion with Cyclic Training","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224928197","doi":"https://doi.org/10.1109/icassp43922.2022.9747140"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747140","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030295554","display_name":"Yan\u2010Nian Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yan-Nian Chen","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,P.R. China","National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, P.R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,P.R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, P.R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100343789","display_name":"Lijuan Liu","orcid":"https://orcid.org/0009-0007-0163-7336"},"institutions":[{"id":"https://openalex.org/I4210090140","display_name":"IFU (Germany)","ror":"https://ror.org/00ee3x048","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210090140"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Li-Juan Liu","raw_affiliation_strings":["iFLYTEK Co., Ltd.,iFLYTEK Research,Hefei,P.R. China","iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R. China"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Co., Ltd.,iFLYTEK Research,Hefei,P.R. China","institution_ids":["https://openalex.org/I4210090140"]},{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059516164","display_name":"Ya-Jun Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090140","display_name":"IFU (Germany)","ror":"https://ror.org/00ee3x048","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210090140"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ya-Jun Hu","raw_affiliation_strings":["iFLYTEK Co., Ltd.,iFLYTEK Research,Hefei,P.R. China","iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R. China"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Co., Ltd.,iFLYTEK Research,Hefei,P.R. China","institution_ids":["https://openalex.org/I4210090140"]},{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064657549","display_name":"Yuan Jiang","orcid":"https://orcid.org/0000-0003-4307-0562"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Jiang","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,P.R. China","National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, P.R. China","iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,P.R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, P.R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., Hefei, P.R. China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,P.R. China","National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, P.R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,P.R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, P.R. China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030295554"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":1.0394,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.77687998,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7007","last_page":"7011"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7671436071395874},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.741489052772522},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6058480143547058},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5846167206764221},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5476552248001099},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.49360740184783936},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4107874631881714},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38963091373443604}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7671436071395874},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.741489052772522},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6058480143547058},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5846167206764221},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5476552248001099},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.49360740184783936},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4107874631881714},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38963091373443604},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747140","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1509691205","https://openalex.org/W1522301498","https://openalex.org/W1972420736","https://openalex.org/W2004299580","https://openalex.org/W2049686551","https://openalex.org/W2100819376","https://openalex.org/W2118154032","https://openalex.org/W2120605154","https://openalex.org/W2121387787","https://openalex.org/W2126143605","https://openalex.org/W2157412983","https://openalex.org/W2401544731","https://openalex.org/W2518172956","https://openalex.org/W2526425061","https://openalex.org/W2532494225","https://openalex.org/W2804998325","https://openalex.org/W2888922217","https://openalex.org/W2889329491","https://openalex.org/W2897353073","https://openalex.org/W2902070858","https://openalex.org/W2938583109","https://openalex.org/W2963539064","https://openalex.org/W2972399707","https://openalex.org/W3015338123","https://openalex.org/W3046998876","https://openalex.org/W3097001834","https://openalex.org/W3099078140","https://openalex.org/W3101689408","https://openalex.org/W4245885054","https://openalex.org/W4249468441","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6726528559","https://openalex.org/W6766442783","https://openalex.org/W6781427223","https://openalex.org/W6785457655"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W2499802997","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2029134149","https://openalex.org/W2368768466","https://openalex.org/W2757081366"],"abstract_inverted_index":{"In":[0],"recognition-synthesis":[1],"based":[2],"any-to-one":[3],"voice":[4],"conversion":[5,43,75],"(VC),":[6],"an":[7],"automatic":[8],"speech":[9],"recognition":[10],"(ASR)":[11],"model":[12],"is":[13,22,94,155],"employed":[14],"to":[15,24,56,65,141],"extract":[16],"content-related":[17,35,59],"features":[18,28,36,60,128,145],"and":[19,76,150,194],"a":[20,90,121,151],"synthesizer":[21,136],"built":[23],"predict":[25,142],"the":[26,30,34,42,51,58,62,67,70,80,110,114,132,135,138,143,147,177,190],"acoustic":[27,103,127,144,169],"of":[29,37,61,69,83,113,134,146,168,185,192],"target":[31,63,115,148,195],"speaker":[32,64,81,116,149,199],"from":[33],"any":[38],"source":[39,46,174,193],"speakers":[40,47,119,175,196],"at":[41,50,137],"stage.":[44],"Since":[45],"are":[48,106,129],"unknown":[49],"training":[52,77,92,111,139],"stage,":[53],"we":[54],"have":[55],"use":[57],"estimate":[66],"parameters":[68],"synthesizer.":[71],"This":[72,99],"inconsistency":[73],"between":[74],"stages":[78],"constrains":[79],"similarity":[82,184],"converted":[84,186],"speech.":[85],"To":[86],"address":[87],"this":[88,97],"issue,":[89],"cyclic":[91,152],"method":[93,100,163],"proposed":[95,162],"in":[96,120],"paper.":[98],"designs":[101],"pseudo-source":[102,126],"features,":[104],"which":[105],"generated":[107],"by":[108],"converting":[109],"data":[112],"towards":[117],"multiple":[118],"reference":[122],"corpus.":[123],"Then,":[124],"these":[125],"used":[130],"as":[131],"input":[133],"stage":[140],"reconstruction":[153],"loss":[154],"derived.":[156],"Experimental":[157],"results":[158],"show":[159],"that":[160],"our":[161],"achieved":[164,182],"more":[165],"consistent":[166],"accuracy":[167],"feature":[170],"prediction":[171],"for":[172,189],"various":[173],"than":[176],"baseline":[178],"method.":[179],"It":[180],"also":[181],"better":[183],"speech,":[187],"especially":[188],"pairs":[191],"with":[197],"distant":[198],"characteristics.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
