{"id":"https://openalex.org/W3016081413","doi":"https://doi.org/10.1109/icassp40776.2020.9054010","title":"An Improved Frame-Unit-Selection Based Voice Conversion System Without Parallel Training Data","display_name":"An Improved Frame-Unit-Selection Based Voice Conversion System Without Parallel Training Data","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3016081413","doi":"https://doi.org/10.1109/icassp40776.2020.9054010","mag":"3016081413"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054010","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054010","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101692260","display_name":"Fenglong Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Feng-Long Xie","raw_affiliation_strings":["Tencent, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100701662","display_name":"Xinhui Li","orcid":"https://orcid.org/0000-0002-8398-8741"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin-Hui Li","raw_affiliation_strings":["Tencent, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114377956","display_name":"Bo Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Liu","raw_affiliation_strings":["Tencent, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101398585","display_name":"Yibin Zheng","orcid":"https://orcid.org/0000-0001-9158-1813"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi-Bin Zheng","raw_affiliation_strings":["Tencent, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101559947","display_name":"Meng Li","orcid":"https://orcid.org/0000-0003-1019-6118"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Meng","raw_affiliation_strings":["Tencent, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100658678","display_name":"Li L\u00fc","orcid":"https://orcid.org/0000-0001-5230-3749"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Lu","raw_affiliation_strings":["Tencent, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065394791","display_name":"Frank K. Soong","orcid":"https://orcid.org/0000-0002-9088-3577"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Frank K. Soong","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101692260"],"corresponding_institution_ids":["https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":0.1326,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.51705788,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":"22","issue":null,"first_page":"7754","last_page":"7758"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7918823957443237},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6380817890167236},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6225287914276123},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6002739071846008},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5262381434440613},{"id":"https://openalex.org/keywords/unit","display_name":"Unit (ring theory)","score":0.46253255009651184},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4184172451496124},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3457031846046448},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.14040344953536987}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7918823957443237},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6380817890167236},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6225287914276123},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6002739071846008},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5262381434440613},{"id":"https://openalex.org/C122637931","wikidata":"https://www.wikidata.org/wiki/Q118084","display_name":"Unit (ring theory)","level":2,"score":0.46253255009651184},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4184172451496124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3457031846046448},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.14040344953536987},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054010","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054010","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7599999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1509691205","https://openalex.org/W1520370180","https://openalex.org/W1965555277","https://openalex.org/W1974745215","https://openalex.org/W1977362459","https://openalex.org/W2032130465","https://openalex.org/W2035846950","https://openalex.org/W2049686551","https://openalex.org/W2086796102","https://openalex.org/W2120605154","https://openalex.org/W2123771434","https://openalex.org/W2135832479","https://openalex.org/W2152974894","https://openalex.org/W2156142001","https://openalex.org/W2157412983","https://openalex.org/W2161476805","https://openalex.org/W2294246205","https://openalex.org/W2515439472","https://openalex.org/W2518172956","https://openalex.org/W2518312472","https://openalex.org/W2519091744","https://openalex.org/W2746474733","https://openalex.org/W2749651610","https://openalex.org/W2785608393","https://openalex.org/W2889329491","https://openalex.org/W2890402938","https://openalex.org/W2897353073","https://openalex.org/W2899877258","https://openalex.org/W2902844798","https://openalex.org/W2949382160","https://openalex.org/W2962896155","https://openalex.org/W2963035245","https://openalex.org/W2963091184","https://openalex.org/W2963808252","https://openalex.org/W2964243274","https://openalex.org/W2964307104","https://openalex.org/W2972519044","https://openalex.org/W2972574864","https://openalex.org/W3099078140","https://openalex.org/W3177989406","https://openalex.org/W4298580827","https://openalex.org/W6697270646","https://openalex.org/W6748409065","https://openalex.org/W6798679566","https://openalex.org/W7009720900"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952","https://openalex.org/W4394050964","https://openalex.org/W2551249631"],"abstract_inverted_index":{"A":[0],"frame-unit-selection":[1],"based":[2],"voice":[3],"conversion":[4],"system":[5,152,160],"proposed":[6],"earlier":[7],"by":[8,102],"us":[9],"is":[10,44,55,100],"revisited":[11],"here":[12],"to":[13,46,90,107,140,143,167,178],"enhance":[14],"its":[15],"performance":[16,164,175],"in":[17,153,184],"both":[18,185],"speech":[19,120,186],"naturalness":[20,187],"and":[21,29,66,71,78,121,134,173,189],"speaker":[22,190],"similarity.":[23],"Speaker":[24],"independent,":[25],"bilingual":[26],"(Mandarin":[27],"Chinese":[28,181],"American":[30,169],"English)":[31],"deep":[32],"neural":[33],"net":[34],"(DNN)":[35],"acoustic":[36,111,115],"model\u2019s":[37],"output,":[38],"frame-level":[39,53],"phone":[40],"posterior":[41],"probability":[42],"(PPP),":[43],"used":[45,56,84],"represent":[47],"the":[48,58,72,109,114,118,130,145,150],"phonetic":[49],"information.":[50,60],"The":[51,95,124],"corresponding":[52],"F0":[54,80],"as":[57],"prosodic":[59],"Kullback-Leibler":[61],"divergence":[62],"(KLD)":[63],"between":[64,75,113],"source":[65,77,119],"target":[67,79,87,97,122],"PPPs":[68],"(phonetic":[69],"distortion)":[70,82],"absolute":[73],"difference":[74,112],"normalized":[76],"(prosodic":[81],"are":[83,138],"for":[85],"selecting":[86],"frame":[88],"candidates":[89],"construct":[91],"a":[92],"search":[93],"lattice.":[94],"optimal":[96],"unit":[98],"trajectory":[99,116,127,137],"obtained":[101,125],"Viterbi":[103],"algorithm":[104],"which":[105],"tries":[106],"minimize":[108],"dynamic":[110],"of":[117],"candidates.":[123],"spectral":[126],"together":[128],"with":[129,149],"enhanced":[131],"pitch":[132,135],"period":[133],"correlation":[136],"sent":[139],"LPCNet":[141],"vocoder":[142],"synthesize":[144],"converted":[146],"waveforms.":[147],"Compared":[148],"top-rank":[151],"Voice":[154],"Conversion":[155],"Challenge":[156],"2018,":[157],"our":[158],"new":[159],"can":[161],"achieve":[162],"on-par":[163],"on":[165,176],"studio":[166,168,179],"English":[170],"VC":[171,182],"test,":[172,183],"better":[174],"non-studio":[177],"Mandarin":[180],"MOS":[188],"similarity":[191],"DMOS.":[192]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
