{"id":"https://openalex.org/W4416251335","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227796","title":"Evaluating Robustness of Voice Conversion Systems under Multi-source Channel Interference","display_name":"Evaluating Robustness of Voice Conversion Systems under Multi-source Channel Interference","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251335","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227796"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227796","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007508388","display_name":"Qianniu Chen","orcid":"https://orcid.org/0000-0002-3495-4369"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qianniu Chen","raw_affiliation_strings":["Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009280540","display_name":"Xiaodi Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodi Zhao","raw_affiliation_strings":["Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070163784","display_name":"Z. P. Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhehan Gu","raw_affiliation_strings":["Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102764683","display_name":"Xiao Li","orcid":"https://orcid.org/0000-0003-0186-1788"},"institutions":[{"id":"https://openalex.org/I4210156488","display_name":"Institute of Zoology of the Slovak Academy of Sciences","ror":"https://ror.org/05tm6j853","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210156488"]}],"countries":["SK"],"is_corresponding":false,"raw_author_name":"Xiao Li","raw_affiliation_strings":["Institute of Blockchain and Data Security,Hangzhou High-Tech Zone (Binjiang)"],"affiliations":[{"raw_affiliation_string":"Institute of Blockchain and Data Security,Hangzhou High-Tech Zone (Binjiang)","institution_ids":["https://openalex.org/I4210156488"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100386363","display_name":"Lu Li","orcid":"https://orcid.org/0000-0003-4573-8538"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Lu","raw_affiliation_strings":["Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,State Key Laboratory of Blockchain and Data Security, College of Computer Science and Technology","institution_ids":["https://openalex.org/I55712492"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5007508388"],"corresponding_institution_ids":["https://openalex.org/I55712492"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.42129321,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.5658000111579895,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.5658000111579895,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.1738000065088272,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.08089999854564667,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.6195999979972839},{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.5961999893188477},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5317999720573425},{"id":"https://openalex.org/keywords/interference","display_name":"Interference (communication)","score":0.5293999910354614},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5005999803543091},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3375999927520752},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.3082999885082245},{"id":"https://openalex.org/keywords/transmission","display_name":"Transmission (telecommunications)","score":0.30410000681877136}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7347000241279602},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.6195999979972839},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.5961999893188477},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5317999720573425},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.5293999910354614},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5188999772071838},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5005999803543091},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.35519999265670776},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.3082999885082245},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.30410000681877136},{"id":"https://openalex.org/C137246740","wikidata":"https://www.wikidata.org/wiki/Q583970","display_name":"Spectral efficiency","level":3,"score":0.29899999499320984},{"id":"https://openalex.org/C2983848023","wikidata":"https://www.wikidata.org/wiki/Q733553","display_name":"Transmission channel","level":3,"score":0.28679999709129333},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C2778192920","wikidata":"https://www.wikidata.org/wiki/Q16874989","display_name":"Signal compression","level":4,"score":0.27489998936653137},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C557945733","wikidata":"https://www.wikidata.org/wiki/Q389772","display_name":"Data transmission","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C2778116611","wikidata":"https://www.wikidata.org/wiki/Q25110567","display_name":"Frequency band","level":3,"score":0.2515000104904175},{"id":"https://openalex.org/C6532628","wikidata":"https://www.wikidata.org/wiki/Q5137431","display_name":"Co-channel interference","level":4,"score":0.25110000371932983},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227796","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2057609679","https://openalex.org/W2126143605","https://openalex.org/W2156142001","https://openalex.org/W2294246205","https://openalex.org/W2294351487","https://openalex.org/W2519091744","https://openalex.org/W2763188033","https://openalex.org/W2972659941","https://openalex.org/W3024869864","https://openalex.org/W3143523927","https://openalex.org/W3168719651","https://openalex.org/W3196667132","https://openalex.org/W3206503703","https://openalex.org/W3213319487","https://openalex.org/W4226320669","https://openalex.org/W4283215219","https://openalex.org/W4317926938","https://openalex.org/W4372260053","https://openalex.org/W4391021367","https://openalex.org/W4393156644","https://openalex.org/W4411337830"],"related_works":[],"abstract_inverted_index":{"Voice":[0],"Conversion":[1],"(VC)":[2],"technology":[3],"holds":[4],"significant":[5,66],"potential":[6],"for":[7],"enhancing":[8],"communication":[9],"across":[10,103,169],"varied":[11],"application":[12,74],"scenarios,":[13],"such":[14],"as":[15],"voice":[16],"chats,":[17],"video":[18],"conferencing,":[19],"and":[20,32,71,105,115,118,145,157,196,199],"VTuber":[21],"live":[22],"streaming.":[23],"During":[24],"the":[25,43,53,89,140],"VC":[26,46,60,69,96,131,172,177],"systems\u2019":[27],"use,":[28],"there":[29],"is":[30],"inevitable":[31],"complex":[33],"channel":[34,93,101],"interference,":[35,186],"which":[36,62],"becomes":[37],"a":[38,65,82],"key":[39],"factor":[40],"in":[41,133],"downgrading":[42],"performance":[44],"of":[45,56,91,121,135],"systems.":[47,97],"However,":[48],"few":[49],"studies":[50],"comprehensively":[51],"reveal":[52],"specific":[54],"impact":[55],"these":[57],"interferences":[58],"on":[59,95,163],"systems,":[61,173],"gradually":[63],"become":[64],"gap":[67],"between":[68],"design":[70],"its":[72],"landing":[73],"requirements.":[75],"Toward":[76],"this":[77,79],"end,":[78],"paper":[80],"proposes":[81],"comprehensive":[83],"evaluation":[84,126],"framework":[85,129],"to":[86],"systematically":[87],"analyze":[88],"effects":[90],"multi-source":[92,100],"interference":[94,102,182],"We":[98],"investigate":[99],"physical":[104,185],"digital":[106,181],"domains,":[107],"including":[108],"noise,":[109],"reverberation,":[110],"device":[111],"distortion,":[112],"codec":[113],"compression":[114],"transmission":[116],"loss,":[117],"integrate":[119],"all":[120],"them":[122],"into":[123],"our":[124],"proposed":[125],"framework.":[127],"The":[128],"assesses":[130],"systems":[132],"terms":[134],"three":[136,150],"complementary":[137],"dimensions,":[138],"i.e.,":[139],"speaker":[141],"timbre,":[142],"speech":[143],"semantics,":[144],"signal":[146],"consistency,":[147],"by":[148],"introducing":[149],"core":[151],"metrics:":[152],"timbre":[153,189],"similarity,":[154,156],"semantic":[155],"acoustic":[158],"similarity.":[159],"Through":[160],"extensive":[161],"experiments":[162],"44,000":[164],"samples":[165],"from":[166],"109":[167],"speakers":[168],"six":[170],"representative":[171],"we":[174],"find":[175],"that":[176],"systems:":[178],"1)":[179],"handle":[180],"better":[183],"than":[184],"2)":[187],"maintain":[188],"features":[190],"well":[191],"but":[192],"struggle":[193],"with":[194],"semantics":[195],"sound":[197],"quality,":[198],"3)":[200],"could":[201],"achieve":[202],"robust":[203],"conversion":[204],"through":[205],"feature":[206],"disentanglement.":[207]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
