{"id":"https://openalex.org/W4408345783","doi":"https://doi.org/10.1109/icassp49660.2025.10887732","title":"SYKI-SVC: Advancing Singing Voice Conversion with Post-Processing Innovations and an Open-Source Professional Testset","display_name":"SYKI-SVC: Advancing Singing Voice Conversion with Post-Processing Innovations and an Open-Source Professional Testset","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408345783","doi":"https://doi.org/10.1109/icassp49660.2025.10887732"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10887732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887732","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102566429","display_name":"Yiquan Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiquan Zhou","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067816529","display_name":"Wenyu Wang","orcid":"https://orcid.org/0000-0001-6772-1839"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenyu Wang","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113430879","display_name":"Hongwu Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongwu Ding","raw_affiliation_strings":["AI Center Speech Group Happy Elements,China"],"affiliations":[{"raw_affiliation_string":"AI Center Speech Group Happy Elements,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100622261","display_name":"Jiacheng Xu","orcid":"https://orcid.org/0000-0002-9292-6999"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiacheng Xu","raw_affiliation_strings":["East China Normal University,School of Software Engineering,China"],"affiliations":[{"raw_affiliation_string":"East China Normal University,School of Software Engineering,China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068185614","display_name":"Jihua Zhu","orcid":"https://orcid.org/0000-0002-3081-8781"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jihua Zhu","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072992339","display_name":"Xin Gao","orcid":"https://orcid.org/0000-0002-3367-3725"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Gao","raw_affiliation_strings":["Union Wheatland Culture and Media Ltd.,Division of Music and Audio,Chengdu,China"],"affiliations":[{"raw_affiliation_string":"Union Wheatland Culture and Media Ltd.,Division of Music and Audio,Chengdu,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031781175","display_name":"Shihao Li","orcid":"https://orcid.org/0000-0001-8688-3120"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shihao Li","raw_affiliation_strings":["Union Wheatland Culture and Media Ltd.,Division of Music and Audio,Chengdu,China"],"affiliations":[{"raw_affiliation_string":"Union Wheatland Culture and Media Ltd.,Division of Music and Audio,Chengdu,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102566429"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":4.3637,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93360702,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9307000041007996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9258999824523926,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.7748996019363403},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.6927923560142517},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6733751893043518},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5229740142822266},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.15501675009727478},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08323964476585388},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.07905599474906921}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.7748996019363403},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.6927923560142517},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6733751893043518},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5229740142822266},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.15501675009727478},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08323964476585388},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.07905599474906921},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10887732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887732","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2067709094","https://openalex.org/W2963300588","https://openalex.org/W2963539064","https://openalex.org/W2972667718","https://openalex.org/W3083423753","https://openalex.org/W3095936335","https://openalex.org/W3162512456","https://openalex.org/W3207340675","https://openalex.org/W3209059054","https://openalex.org/W3210530853","https://openalex.org/W4247282941","https://openalex.org/W4296068763","https://openalex.org/W4312806563","https://openalex.org/W4372338328","https://openalex.org/W4385245566","https://openalex.org/W4385822304","https://openalex.org/W4391021724","https://openalex.org/W4391021772","https://openalex.org/W4391021798","https://openalex.org/W6772349387","https://openalex.org/W6783867762","https://openalex.org/W6796464841","https://openalex.org/W6839738141","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390529913","https://openalex.org/W2142368101","https://openalex.org/W2372249404","https://openalex.org/W2367547137","https://openalex.org/W2354994102","https://openalex.org/W2387733758","https://openalex.org/W2376664795"],"abstract_inverted_index":{"Singing":[0],"voice":[1,9,34,55,84],"conversion":[2,35,133],"aims":[3],"to":[4,69,95,116,121],"transform":[5],"a":[6,13,31,51,54,58,125,142,153],"source":[7,109],"singing":[8,33,81,132],"into":[10],"that":[11,149],"of":[12,47,124,157,165],"target":[14,98],"singer":[15],"while":[16],"preserving":[17],"the":[18,41,64,79,88,97,108,122,163],"original":[19],"lyrics,":[20],"melody,":[21],"and":[22,45,57,66,73,92,112,138,159],"various":[23],"vocal":[24],"techniques.":[25],"In":[26],"this":[27],"paper,":[28],"we":[29,135],"propose":[30],"high-fidelity":[32],"system.":[36],"Our":[37],"system":[38,151,168],"builds":[39],"upon":[40],"SVCC":[42],"T02":[43],"framework":[44],"consists":[46],"three":[48],"key":[49],"components:":[50],"feature":[52,61],"extractor,":[53],"converter,":[56],"post-processor.":[59],"The":[60,83,101],"extractor":[62],"utilizes":[63],"ContentVec":[65],"Whisper":[67],"models":[68],"derive":[70],"F0":[71],"contours":[72],"extract":[74],"speaker-independent":[75],"linguistic":[76,93],"features":[77],"from":[78,107],"input":[80],"voice.":[82],"converter":[85],"then":[86],"integrates":[87],"extracted":[89],"timbre,":[90],"F0,":[91],"content":[94],"synthesize":[96],"speaker\u2019s":[99],"waveform.":[100],"post-processor":[102],"augments":[103],"high-frequency":[104],"information":[105],"directly":[106],"through":[110],"simple":[111],"effective":[113],"signal":[114],"processing":[115],"enhance":[117],"audio":[118],"quality.":[119],"Due":[120],"lack":[123],"standardized":[126],"professional":[127],"dataset":[128],"for":[129],"evaluating":[130],"expressive":[131],"systems,":[134],"have":[136],"created":[137],"made":[139],"publicly":[140],"available":[141],"specialized":[143],"test":[144],"set.":[145],"Comparative":[146],"evaluations":[147],"demonstrate":[148],"our":[150,166],"achieves":[152],"remarkably":[154],"high":[155],"level":[156],"naturalness,":[158],"further":[160],"analysis":[161],"confirms":[162],"efficacy":[164],"proposed":[167],"design.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
